Reference Guide  2.5.0
transformations.py
1 # -----------------------------------------------------------------------------
2 # BSD 3-Clause License
3 #
4 # Copyright (c) 2017-2024, Science and Technology Facilities Council.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are met:
9 #
10 # * Redistributions of source code must retain the above copyright notice, this
11 # list of conditions and the following disclaimer.
12 #
13 # * Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
16 #
17 # * Neither the name of the copyright holder nor the names of its
18 # contributors may be used to endorse or promote products derived from
19 # this software without specific prior written permission.
20 #
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 # POSSIBILITY OF SUCH DAMAGE.
33 # -----------------------------------------------------------------------------
34 # Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab
35 # A. B. G. Chalk STFC Daresbury Lab
36 # J. Henrichs, Bureau of Meteorology
37 # Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office
38 
39 ''' This module provides the various transformations that can be applied to
40  PSyIR nodes. There are both general and API-specific transformation
41  classes in this module where the latter typically apply API-specific
42  checks before calling the base class for the actual transformation. '''
43 
44 # pylint: disable=too-many-lines
45 
46 import abc
47 
48 from psyclone import psyGen
49 from psyclone.configuration import Config
50 from psyclone.core import Signature, VariablesAccessInfo
51 from psyclone.domain.lfric import (KernCallArgList, LFRicConstants, LFRicKern,
52  LFRicLoop)
53 from psyclone.dynamo0p3 import (LFRicHaloExchangeEnd, LFRicHaloExchangeStart,
54  DynInvokeSchedule)
55 from psyclone.errors import InternalError
56 from psyclone.gocean1p0 import GOInvokeSchedule
57 from psyclone.nemo import NemoInvokeSchedule
58 from psyclone.psyGen import (Transformation, CodedKern, Kern, InvokeSchedule,
59  BuiltIn)
60 from psyclone.psyir.nodes import (
61  ACCDataDirective, ACCDirective, ACCEnterDataDirective, ACCKernelsDirective,
62  ACCLoopDirective, ACCParallelDirective, ACCRoutineDirective, Assignment,
63  Call, CodeBlock, Directive, Loop, Node, OMPDeclareTargetDirective,
64  OMPDirective, OMPMasterDirective,
65  OMPParallelDirective, OMPParallelDoDirective, OMPSerialDirective,
66  OMPSingleDirective, OMPTaskloopDirective, PSyDataNode, Reference,
67  Return, Routine, Schedule)
68 from psyclone.psyir.nodes.array_mixin import ArrayMixin
69 from psyclone.psyir.nodes.structure_member import StructureMember
70 from psyclone.psyir.nodes.structure_reference import StructureReference
71 from psyclone.psyir.symbols import (
72  ArgumentInterface, DataSymbol, UnresolvedType, INTEGER_TYPE, ScalarType,
73  Symbol, SymbolError)
75 from psyclone.psyir.transformations.omp_loop_trans import OMPLoopTrans
77  ParallelLoopTrans
78 from psyclone.psyir.transformations.region_trans import RegionTrans
80  TransformationError
81 
82 
83 def check_intergrid(node):
84  '''
85  Utility function to check that the supplied node does not have
86  an intergrid kernel amongst its descendants.
87 
88  This is used ensure any attempt to apply loop-fusion and redundant-
89  computation transformations to loops containing inter-grid kernels is
90  rejected (since support for those is not yet implemented).
91 
92  :param node: the PSyIR node to check.
93  :type node: :py:class:`psyir.nodes.Node`
94 
95  :raises TransformationError: if the supplied node has an inter-grid \
96  kernel as a descendant.
97 
98  '''
99  if not node.children:
100  return
101  child_kernels = node.walk(LFRicKern)
102  for kern in child_kernels:
103  if kern.is_intergrid:
104  raise TransformationError(
105  f"This Transformation cannot currently be applied to nodes "
106  f"which have inter-grid kernels as descendents and {kern.name}"
107  f" is such a kernel.")
108 
109 
111  '''
112  Adds an OpenMP taskloop directive to a loop. Only one of grainsize or
113  num_tasks must be specified.
114 
115  TODO: #1364 Taskloops do not yet support reduction clauses.
116 
117  :param grainsize: the grainsize to use in for this transformation.
118  :type grainsize: int or None
119  :param num_tasks: the num_tasks to use for this transformation.
120  :type num_tasks: int or None
121  :param bool nogroup: whether or not to use a nogroup clause for this
122  transformation. Default is False.
123 
124  For example:
125 
126  >>> from pysclone.parse.algorithm import parse
127  >>> from psyclone.psyGen import PSyFactory
128  >>> api = "gocean1.0"
129  >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
130  >>> psy = PSyFactory(api).create(invokeInfo)
131  >>>
132  >>> from psyclone.transformations import OMPParallelTrans, OMPSingleTrans
133  >>> from psyclone.transformations import OMPTaskloopTrans
134  >>> from psyclone.psyir.transformations import OMPTaskwaitTrans
135  >>> singletrans = OMPSingleTrans()
136  >>> paralleltrans = OMPParallelTrans()
137  >>> tasklooptrans = OMPTaskloopTrans()
138  >>> taskwaittrans = OMPTaskwaitTrans()
139  >>>
140  >>> schedule = psy.invokes.get('invoke_0').schedule
141  >>> # Uncomment the following line to see a text view of the schedule
142  >>> # print(schedule.view())
143  >>>
144  >>> # Apply the OpenMP Taskloop transformation to *every* loop
145  >>> # in the schedule.
146  >>> # This ignores loop dependencies. These can be handled
147  >>> # by the OMPTaskwaitTrans
148  >>> for child in schedule.children:
149  >>> tasklooptrans.apply(child)
150  >>> # Enclose all of these loops within a single OpenMP
151  >>> # SINGLE region
152  >>> singletrans.apply(schedule.children)
153  >>> # Enclose all of these loops within a single OpenMP
154  >>> # PARALLEL region
155  >>> paralleltrans.apply(schedule.children)
156  >>> # Ensure loop dependencies are satisfied
157  >>> taskwaittrans.apply(schedule.children)
158  >>> # Uncomment the following line to see a text view of the schedule
159  >>> # print(schedule.view())
160 
161  '''
162  def __init__(self, grainsize=None, num_tasks=None, nogroup=False):
163  self._grainsize_grainsize = None
164  self._num_tasks_num_tasks = None
165  self.omp_grainsizeomp_grainsizeomp_grainsizeomp_grainsize = grainsize
166  self.omp_num_tasksomp_num_tasksomp_num_tasksomp_num_tasks = num_tasks
167  self.omp_nogroupomp_nogroupomp_nogroupomp_nogroup = nogroup
168  super().__init__()
169 
170  def __str__(self):
171  return "Adds an 'OpenMP TASKLOOP' directive to a loop"
172 
173  @property
174  def omp_nogroup(self):
175  '''
176  Returns whether the nogroup clause should be specified for
177  this transformation. By default the nogroup clause is applied.
178 
179  :returns: whether the nogroup clause should be specified by
180  this transformation.
181  :rtype: bool
182  '''
183  return self._nogroup_nogroup
184 
185  @omp_nogroup.setter
186  def omp_nogroup(self, nogroup):
187  '''
188  Sets whether the nogroup clause should be specified for this
189  transformation.
190 
191  :param bool nogroup: value to set whether the nogroup clause should be
192  used for this transformation.
193 
194  raises TypeError: if the nogroup parameter is not a bool.
195  '''
196  if not isinstance(nogroup, bool):
197  raise TypeError(f"Expected nogroup to be a bool "
198  f"but got a {type(nogroup).__name__}")
199  self._nogroup_nogroup = nogroup
200 
201  @property
202  def omp_grainsize(self):
203  '''
204  Returns the grainsize that will be specified by
205  this transformation. By default the grainsize
206  clause is not applied, so grainsize is None.
207 
208  :returns: The grainsize specified by this transformation.
209  :rtype: int or None
210  '''
211  return self._grainsize_grainsize
212 
213  @omp_grainsize.setter
214  def omp_grainsize(self, value):
215  '''
216  Sets the grainsize that will be specified by
217  this transformation. Checks the grainsize is
218  a positive integer value or None.
219 
220  :param value: integer value to use in the grainsize clause.
221  :type value: int or None
222 
223  :raises TransformationError: if value is not an int and is not None.
224  :raises TransformationError: if value is negative.
225  :raises TransformationError: if grainsize and num_tasks are \
226  both specified.
227  '''
228  if (not isinstance(value, int)) and (value is not None):
229  raise TransformationError(f"grainsize must be an integer or None, "
230  f"got {type(value).__name__}")
231 
232  if (value is not None) and (value <= 0):
233  raise TransformationError(f"grainsize must be a positive "
234  f"integer, got {value}")
235 
236  if value is not None and self.omp_num_tasksomp_num_tasksomp_num_tasksomp_num_tasks is not None:
237  raise TransformationError(
238  "The grainsize and num_tasks clauses would both "
239  "be specified for this Taskloop transformation")
240  self._grainsize_grainsize = value
241 
242  @property
243  def omp_num_tasks(self):
244  '''
245  Returns the num_tasks that will be specified
246  by this transformation. By default the num_tasks
247  clause is not applied so num_tasks is None.
248 
249  :returns: The grainsize specified by this transformation.
250  :rtype: int or None
251  '''
252  return self._num_tasks_num_tasks
253 
254  @omp_num_tasks.setter
255  def omp_num_tasks(self, value):
256  '''
257  Sets the num_tasks that will be specified by
258  this transformation. Checks that num_tasks is
259  a positive integer value or None.
260 
261  :param value: integer value to use in the num_tasks clause.
262  :type value: int or None
263 
264  :raises TransformationError: if value is not an int and is not None.
265  :raises TransformationError: if value is negative.
266  :raises TransformationError: if grainsize and num_tasks are \
267  both specified.
268 
269  '''
270  if (not isinstance(value, int)) and (value is not None):
271  raise TransformationError(f"num_tasks must be an integer or None,"
272  f" got {type(value).__name__}")
273 
274  if (value is not None) and (value <= 0):
275  raise TransformationError(f"num_tasks must be a positive "
276  f"integer, got {value}")
277 
278  if value is not None and self.omp_grainsizeomp_grainsizeomp_grainsizeomp_grainsize is not None:
279  raise TransformationError(
280  "The grainsize and num_tasks clauses would both "
281  "be specified for this Taskloop transformation")
282  self._num_tasks_num_tasks = value
283 
284  def _directive(self, children, collapse=None):
285  '''
286  Creates the type of directive needed for this sub-class of
287  transformation.
288 
289  :param children: list of Nodes that will be the children of \
290  the created directive.
291  :type children: list of :py:class:`psyclone.psyir.nodes.Node`
292  :param int collapse: currently un-used but required to keep \
293  interface the same as in base class.
294  :returns: the new node representing the directive in the AST.
295  :rtype: :py:class:`psyclone.psyir.nodes.OMPTaskloopDirective`
296 
297  :raises NotImplementedError: if a collapse argument is supplied
298  '''
299  # TODO 1370: OpenMP loop functions don't support collapse
300  if collapse:
301  raise NotImplementedError(
302  "The COLLAPSE clause is not yet supported for "
303  "'!$omp taskloop' directives.")
304  _directive = OMPTaskloopDirective(children=children,
305  grainsize=self.omp_grainsizeomp_grainsizeomp_grainsizeomp_grainsize,
306  num_tasks=self.omp_num_tasksomp_num_tasksomp_num_tasksomp_num_tasks,
307  nogroup=self.omp_nogroupomp_nogroupomp_nogroupomp_nogroup)
308  return _directive
309 
310  def apply(self, node, options=None):
311  '''Apply the OMPTaskloopTrans transformation to the specified node in
312  a Schedule. This node must be a Loop since this transformation
313  corresponds to wrapping the generated code with directives like so:
314 
315  .. code-block:: fortran
316 
317  !$OMP TASKLOOP
318  do ...
319  ...
320  end do
321  !$OMP END TASKLOOP
322 
323  At code-generation time (when
324  :py:meth:`OMPTaskloopDirective.gen_code` is called), this node must be
325  within (i.e. a child of) an OpenMP SERIAL region.
326 
327  If the keyword "nogroup" is specified in the options, it will cause a
328  nogroup clause be generated if it is set to True. This will override
329  the value supplied to the constructor, but will only apply to the
330  apply call to which the value is supplied.
331 
332  :param node: the supplied node to which we will apply the \
333  OMPTaskloopTrans transformation
334  :type node: :py:class:`psyclone.psyir.nodes.Node`
335  :param options: a dictionary with options for transformations\
336  and validation.
337  :type options: Optional[Dict[str, Any]]
338  :param bool options["nogroup"]:
339  indicating whether a nogroup clause should be applied to
340  this taskloop.
341 
342  '''
343  if not options:
344  options = {}
345  current_nogroup = self.omp_nogroupomp_nogroupomp_nogroupomp_nogroup
346  # If nogroup is specified it overrides that supplied to the
347  # constructor of the Transformation, but will be reset at the
348  # end of this function
349  self.omp_nogroupomp_nogroupomp_nogroupomp_nogroup = options.get("nogroup", current_nogroup)
350 
351  try:
352  super().apply(node, options)
353  finally:
354  # Reset the nogroup value to the original value
355  self.omp_nogroupomp_nogroupomp_nogroupomp_nogroup = current_nogroup
356 
357 
359  ''' This Mixin provides the "validate_it_can_run_on_gpu" method that
360  given a routine or kernel node, it checks that the callee code is valid
361  to run on a GPU. It is implemented as a Mixin because transformations
362  from multiple programming models, e.g. OpenMP and OpenACC, can reuse
363  the same logic.
364 
365  '''
366  def validate_it_can_run_on_gpu(self, node, options):
367  '''
368  Check that the supplied node can be marked as available to be
369  called on GPU.
370 
371  :param node: the kernel or routine to validate.
372  :type node: :py:class:`psyclone.psyGen.Kern` |
373  :py:class:`psyclone.psyir.nodes.Routine`
374  :param options: a dictionary with options for transformations.
375  :type options: Optional[Dict[str, Any]]
376  :param bool options["force"]: whether to allow routines with
377  CodeBlocks to run on the GPU.
378 
379  :raises TransformationError: if the node is not a kernel or a routine.
380  :raises TransformationError: if the target is a built-in kernel.
381  :raises TransformationError: if it is a kernel but without an
382  associated PSyIR.
383  :raises TransformationError: if any of the symbols in the kernel are
384  accessed via a module use statement.
385  :raises TransformationError: if the kernel contains any calls to other
386  routines.
387  '''
388  force = options.get("force", False) if options else False
389 
390  if not isinstance(node, (Kern, Routine)):
391  raise TransformationError(
392  f"The {type(self).__name__} must be applied to a sub-class of "
393  f"Kern or Routine but got '{type(node).__name__}'.")
394 
395  # If it is a kernel call it must have an accessible implementation
396  if isinstance(node, BuiltIn):
397  raise TransformationError(
398  f"Applying {type(self).__name__} to a built-in kernel is not "
399  f"yet supported and kernel '{node.name}' is of type "
400  f"'{type(node).__name__}'")
401 
402  if isinstance(node, Kern):
403  # Get the PSyIR routine from the associated kernel. If there is an
404  # exception (this could mean that there is no associated tree
405  # or that the frontend failed to convert it into PSyIR) reraise it
406  # as a TransformationError
407  try:
408  kernel_schedule = node.get_kernel_schedule()
409  except Exception as error:
410  raise TransformationError(
411  f"Failed to create PSyIR for kernel '{node.name}'. "
412  f"Cannot transform such a kernel.") from error
413  k_or_r = "Kernel"
414  else:
415  # Supplied node is a PSyIR Routine which *is* a Schedule.
416  kernel_schedule = node
417  k_or_r = "routine"
418 
419  # Check that the routine does not access any data that is imported via
420  # a 'use' statement.
421  # TODO #2271 - this implementation will not catch symbols from literal
422  # precisions or intialisation expressions.
423  refs = kernel_schedule.walk(Reference)
424  for ref in refs:
425  if ref.symbol.is_import:
426  # resolve_type does nothing if the Symbol type is known.
427  try:
428  ref.symbol.resolve_type()
429  except SymbolError:
430  # TODO #11 - log that we failed to resolve this Symbol.
431  pass
432  if (isinstance(ref.symbol, DataSymbol) and
433  ref.symbol.is_constant):
434  # An import of a compile-time constant is fine.
435  continue
436  raise TransformationError(
437  f"{k_or_r} '{node.name}' accesses the symbol "
438  f"'{ref.symbol}' which is imported. If this symbol "
439  f"represents data then it must first be converted to a "
440  f"{k_or_r} argument using the KernelImportsToArguments "
441  f"transformation.")
442 
443  # We forbid CodeBlocks because we can't be certain that what they
444  # contain can be executed on a GPU. However, we do permit the user
445  # to override this check.
446  cblocks = kernel_schedule.walk(CodeBlock)
447  if not force:
448  if cblocks:
449  cblock_txt = ("\n " + "\n ".join(str(node) for node in
450  cblocks[0].get_ast_nodes)
451  + "\n")
452  option_txt = "options={'force': True}"
453  raise TransformationError(
454  f"Cannot safely apply {type(self).__name__} to {k_or_r} "
455  f"'{node.name}' because its PSyIR contains one or more "
456  f"CodeBlocks:{cblock_txt}You may use '{option_txt}' to "
457  f"override this check.")
458  else:
459  # Check any accesses within CodeBlocks.
460  # TODO #2271 - this will be handled as part of the checking to be
461  # implemented using the dependence analysis.
462  for cblock in cblocks:
463  names = cblock.get_symbol_names()
464  for name in names:
465  sym = kernel_schedule.symbol_table.lookup(name)
466  if sym.is_import:
467  raise TransformationError(
468  f"{k_or_r} '{node.name}' accesses the symbol "
469  f"'{sym.name}' within a CodeBlock and this symbol "
470  f"is imported. {type(self).__name__} cannot be "
471  f"applied to such a {k_or_r}.")
472 
473  calls = kernel_schedule.walk(Call)
474  for call in calls:
475  if not call.is_available_on_device():
476  call_str = call.debug_string().rstrip("\n")
477  raise TransformationError(
478  f"{k_or_r} '{node.name}' calls another routine "
479  f"'{call_str}' which is not available on the "
480  f"accelerator device and therefore cannot have "
481  f"{type(self).__name__} applied to it (TODO #342).")
482 
483 
485  '''
486  Adds an OpenMP declare target directive to the specified routine.
487 
488  For example:
489 
490  >>> from psyclone.psyir.frontend.fortran import FortranReader
491  >>> from psyclone.psyir.nodes import Loop
492  >>> from psyclone.transformations import OMPDeclareTargetTrans
493  >>>
494  >>> tree = FortranReader().psyir_from_source("""
495  ... subroutine my_subroutine(A)
496  ... integer, dimension(10, 10), intent(inout) :: A
497  ... integer :: i
498  ... integer :: j
499  ... do i = 1, 10
500  ... do j = 1, 10
501  ... A(i, j) = 0
502  ... end do
503  ... end do
504  ... end subroutine
505  ... """
506  >>> omptargettrans = OMPDeclareTargetTrans()
507  >>> omptargettrans.apply(tree.walk(Routine)[0])
508 
509  will generate:
510 
511  .. code-block:: fortran
512 
513  subroutine my_subroutine(A)
514  integer, dimension(10, 10), intent(inout) :: A
515  integer :: i
516  integer :: j
517  !$omp declare target
518  do i = 1, 10
519  do j = 1, 10
520  A(i, j) = 0
521  end do
522  end do
523  end subroutine
524 
525  '''
526  def apply(self, node, options=None):
527  ''' Insert an OMPDeclareTargetDirective inside the provided routine.
528 
529  :param node: the PSyIR routine to insert the directive into.
530  :type node: :py:class:`psyclone.psyir.nodes.Routine`
531  :param options: a dictionary with options for transformations.
532  :type options: Optional[Dict[str, Any]]
533 
534  '''
535  self.validatevalidatevalidate(node, options)
536  for child in node.children:
537  if isinstance(child, OMPDeclareTargetDirective):
538  return # The routine is already marked with OMPDeclareTarget
539  node.children.insert(0, OMPDeclareTargetDirective())
540 
541  def validate(self, node, options=None):
542  ''' Check that an OMPDeclareTargetDirective can be inserted.
543 
544  :param node: the kernel or routine which is the target of this
545  transformation.
546  :type node: :py:class:`psyclone.psyGen.Kern` |
547  :py:class:`psyclone.psyir.nodes.Routine`
548  :param options: a dictionary with options for transformations.
549  :type options: Optional[Dict[str, Any]]
550  :param bool options["force"]: whether to allow routines with
551  CodeBlocks to run on the GPU.
552 
553  :raises TransformationError: if the node is not a kernel or a routine.
554  :raises TransformationError: if the target is a built-in kernel.
555  :raises TransformationError: if it is a kernel but without an
556  associated PSyIR.
557  :raises TransformationError: if any of the symbols in the kernel are
558  accessed via a module use statement.
559  :raises TransformationError: if the kernel contains any calls to other
560  routines.
561 
562  '''
563  super().validate(node, options=options)
564 
565  self.validate_it_can_run_on_gpuvalidate_it_can_run_on_gpu(node, options)
566 
567 
569  '''
570  Adds an OpenACC loop directive to a loop. This directive must be within
571  the scope of some OpenACC Parallel region (at code-generation time).
572 
573  For example:
574 
575  >>> from psyclone.parse.algorithm import parse
576  >>> from psyclone.parse.utils import ParseError
577  >>> from psyclone.psyGen import PSyFactory
578  >>> from psyclone.errors import GenerationError
579  >>> api = "gocean1.0"
580  >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
581  >>> psy = PSyFactory(api).create(invokeInfo)
582  >>>
583  >>> from psyclone.psyGen import TransInfo
584  >>> t = TransInfo()
585  >>> ltrans = t.get_trans_name('ACCLoopTrans')
586  >>> rtrans = t.get_trans_name('ACCParallelTrans')
587  >>>
588  >>> schedule = psy.invokes.get('invoke_0').schedule
589  >>> # Uncomment the following line to see a text view of the schedule
590  >>> # print(schedule.view())
591  >>>
592  >>> # Apply the OpenACC Loop transformation to *every* loop in the schedule
593  >>> for child in schedule.children[:]:
594  ... ltrans.apply(child)
595  >>>
596  >>> # Enclose all of these loops within a single OpenACC parallel region
597  >>> rtrans.apply(schedule)
598  >>>
599 
600  '''
601  # The types of node that must be excluded from the section of PSyIR
602  # being transformed.
603  excluded_node_types = (PSyDataNode,)
604 
605  def __init__(self):
606  # Whether to add the "independent" clause
607  # to the loop directive.
608  self._independent_independent = True
609  self._sequential_sequential = False
610  self._gang_gang = False
611  self._vector_vector = False
612  super().__init__()
613 
614  def __str__(self):
615  return "Adds an 'OpenACC loop' directive to a loop"
616 
617  def _directive(self, children, collapse=None):
618  '''
619  Creates the ACCLoopDirective needed by this sub-class of
620  transformation.
621 
622  :param children: list of child nodes of the new directive Node.
623  :type children: list of :py:class:`psyclone.psyir.nodes.Node`
624  :param int collapse: number of nested loops to collapse or None if
625  no collapse attribute is required.
626  '''
627  directive = ACCLoopDirective(children=children,
628  collapse=collapse,
629  independent=self._independent_independent,
630  sequential=self._sequential_sequential,
631  gang=self._gang_gang,
632  vector=self._vector_vector)
633  return directive
634 
635  def apply(self, node, options=None):
636  '''
637  Apply the ACCLoop transformation to the specified node. This node
638  must be a Loop since this transformation corresponds to
639  inserting a directive immediately before a loop, e.g.:
640 
641  .. code-block:: fortran
642 
643  !$ACC LOOP
644  do ...
645  ...
646  end do
647 
648  At code-generation time (when
649  :py:meth:`psyclone.psyir.nodes.ACCLoopDirective.gen_code` is called),
650  this node must be within (i.e. a child of) a PARALLEL region.
651 
652  :param node: the supplied node to which we will apply the
653  Loop transformation.
654  :type node: :py:class:`psyclone.psyir.nodes.Loop`
655  :param options: a dictionary with options for transformations.
656  :type options: Optional[Dict[str, Any]]
657  :param int options["collapse"]: number of nested loops to collapse.
658  :param bool options["independent"]: whether to add the "independent"
659  clause to the directive (not strictly necessary within
660  PARALLEL regions).
661  :param bool options["sequential"]: whether to add the "seq" clause to
662  the directive.
663  :param bool options["gang"]: whether to add the "gang" clause to the
664  directive.
665  :param bool options["vector"]: whether to add the "vector" clause to
666  the directive.
667 
668  '''
669  # Store sub-class specific options. These are used when
670  # creating the directive (in the _directive() method).
671  if not options:
672  options = {}
673  self._independent_independent = options.get("independent", True)
674  self._sequential_sequential = options.get("sequential", False)
675  self._gang_gang = options.get("gang", False)
676  self._vector_vector = options.get("vector", False)
677 
678  # Call the apply() method of the base class
679  super().apply(node, options)
680 
681 
683 
684  ''' Adds an OpenMP PARALLEL DO directive to a loop.
685 
686  For example:
687 
688  >>> from psyclone.parse.algorithm import parse
689  >>> from psyclone.psyGen import PSyFactory
690  >>> ast, invokeInfo = parse("dynamo.F90")
691  >>> psy = PSyFactory("dynamo0.3").create(invokeInfo)
692  >>> schedule = psy.invokes.get('invoke_v3_kernel_type').schedule
693  >>> # Uncomment the following line to see a text view of the schedule
694  >>> # print(schedule.view())
695  >>>
696  >>> from psyclone.transformations import OMPParallelLoopTrans
697  >>> trans = OMPParallelLoopTrans()
698  >>> trans.apply(schedule.children[0])
699  >>> # Uncomment the following line to see a text view of the schedule
700  >>> # print(schedule.view())
701 
702  '''
703  def __str__(self):
704  return "Add an 'OpenMP PARALLEL DO' directive"
705 
706  def apply(self, node, options=None):
707  ''' Apply an OMPParallelLoop Transformation to the supplied node
708  (which must be a Loop). In the generated code this corresponds to
709  wrapping the Loop with directives:
710 
711  .. code-block:: fortran
712 
713  !$OMP PARALLEL DO ...
714  do ...
715  ...
716  end do
717  !$OMP END PARALLEL DO
718 
719  :param node: the node (loop) to which to apply the transformation.
720  :type node: :py:class:`psyclone.f2pygen.DoGen`
721  :param options: a dictionary with options for transformations\
722  and validation.
723  :type options: Optional[Dict[str, Any]]
724  '''
725  self.validatevalidatevalidatevalidate(node, options=options)
726 
727  # keep a reference to the node's original parent and its index as these
728  # are required and will change when we change the node's location
729  node_parent = node.parent
730  node_position = node.position
731 
732  # add our OpenMP loop directive setting its parent to the node's
733  # parent and its children to the node
734  directive = OMPParallelDoDirective(children=[node.detach()],
735  omp_schedule=self.omp_scheduleomp_scheduleomp_scheduleomp_schedule)
736 
737  # add the OpenMP loop directive as a child of the node's parent
738  node_parent.addchild(directive, index=node_position)
739 
740 
742 
743  ''' Dynamo-specific OpenMP loop transformation. Adds Dynamo specific
744  validity checks. Actual transformation is done by the
745  :py:class:`base class <OMPParallelLoopTrans>`.
746 
747  :param str omp_directive: choose which OpenMP loop directive to use.
748  Defaults to "do".
749  :param str omp_schedule: the OpenMP schedule to use. Must be one of
750  'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to
751  'static'.
752 
753  '''
754  def __init__(self, omp_directive="do", omp_schedule="static"):
755  super().__init__(omp_directive=omp_directive,
756  omp_schedule=omp_schedule)
757 
758  def __str__(self):
759  return "Add an OpenMP Parallel Do directive to a Dynamo loop"
760 
761  def validate(self, node, options=None):
762  '''
763  Perform LFRic-specific loop validity checks then call the `validate`
764  method of the base class.
765 
766  :param node: the Node in the Schedule to check
767  :type node: :py:class:`psyclone.psyir.nodes.Node`
768  :param options: a dictionary with options for transformations.
769  :type options: Optional[Dict[str, Any]]
770 
771  :raises TransformationError: if the supplied Node is not a LFRicLoop.
772  :raises TransformationError: if the associated loop requires
773  colouring.
774  '''
775  if not isinstance(node, LFRicLoop):
776  raise TransformationError(
777  f"Error in {self.name} transformation. The supplied node "
778  f"must be a LFRicLoop but got '{type(node).__name__}'")
779 
780  # If the loop is not already coloured then check whether or not
781  # it should be. If the field space is discontinuous (including
782  # any_discontinuous_space) then we don't need to worry about
783  # colouring.
784  const = LFRicConstants()
785  if node.field_space.orig_name not in const.VALID_DISCONTINUOUS_NAMES:
786  if node.loop_type != 'colour' and node.has_inc_arg():
787  raise TransformationError(
788  f"Error in {self.name} transformation. The kernel has an "
789  f"argument with INC access. Colouring is required.")
790  # As this is a domain-specific loop, we don't perform general
791  # dependence analysis because it is too conservative and doesn't
792  # account for the special steps taken for such a loop at code-
793  # generation time (e.g. the way we ensure variables are given the
794  # correct sharing attributes).
795  local_options = options.copy() if options else {}
796  local_options["force"] = True
797  super().validate(node, options=local_options)
798 
799 
801 
802  '''GOcean specific OpenMP Do loop transformation. Adds GOcean
803  specific validity checks (that supplied Loop is an inner or outer
804  loop). Actual transformation is done by
805  :py:class:`base class <OMPParallelLoopTrans>`.
806 
807  :param str omp_directive: choose which OpenMP loop directive to use. \
808  Defaults to "do".
809  :param str omp_schedule: the OpenMP schedule to use. Must be one of \
810  'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to \
811  'static'.
812 
813  '''
814  def __init__(self, omp_directive="do", omp_schedule="static"):
815  super().__init__(omp_directive=omp_directive,
816  omp_schedule=omp_schedule)
817 
818  def __str__(self):
819  return "Add an OpenMP Parallel Do directive to a GOcean loop"
820 
821  def apply(self, node, options=None):
822  ''' Perform GOcean-specific loop validity checks then call
823  :py:meth:`OMPParallelLoopTrans.apply`.
824 
825  :param node: a Loop node from an AST.
826  :type node: :py:class:`psyclone.psyir.nodes.Loop`
827  :param options: a dictionary with options for transformations\
828  and validation.
829  :type options: Optional[Dict[str, Any]]
830 
831  :raises TransformationError: if the supplied node is not an inner or\
832  outer loop.
833 
834  '''
835  self.validatevalidatevalidatevalidate(node, options=options)
836 
837  # Check we are either an inner or outer loop
838  if node.loop_type not in ["inner", "outer"]:
839  raise TransformationError(
840  "Error in "+self.namenamename+" transformation. The requested loop"
841  " is not of type inner or outer.")
842 
843  OMPParallelLoopTrans.apply(self, node)
844 
845 
847 
848  ''' LFRic (Dynamo 0.3) specific orphan OpenMP loop transformation. Adds
849  Dynamo-specific validity checks.
850 
851  :param str omp_schedule: the OpenMP schedule to use. Must be one of \
852  'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to \
853  'static'.
854 
855  '''
856  def __init__(self, omp_schedule="static"):
857  super().__init__(omp_directive="do", omp_schedule=omp_schedule)
858 
859  def __str__(self):
860  return "Add an OpenMP DO directive to a Dynamo 0.3 loop"
861 
862  def validate(self, node, options=None):
863  ''' Perform LFRic (Dynamo 0.3) specific loop validity checks for the
864  OMPLoopTrans.
865 
866  :param node: the Node in the Schedule to check
867  :type node: :py:class:`psyclone.psyir.nodes.Node`
868  :param options: a dictionary with options for transformations \
869  and validation.
870  :type options: Optional[Dict[str, Any]]
871  :param bool options["reprod"]: \
872  indicating whether reproducible reductions should be used. \
873  By default the value from the config file will be used.
874 
875  :raises TransformationError: if an OMP loop transform would create \
876  incorrect code.
877 
878  '''
879  if not options:
880  options = {}
881 
882  # Since this function potentially modifies the user's option
883  # dictionary, create a copy:
884  options = options.copy()
885  # Make sure the default is set:
886  options["reprod"] = options.get("reprod",
887  Config.get().reproducible_reductions)
888 
889  # This transformation allows to parallelise loops with potential
890  # dependencies because we use cell colouring to guarantee that
891  # neighbours are not updated at the same time.
892  options["force"] = True
893  super().validate(node, options=options)
894 
895  # If the loop is not already coloured then check whether or not
896  # it should be
897  if node.loop_type != 'colour' and node.has_inc_arg():
898  raise TransformationError(
899  f"Error in {self.name} transformation. The kernel has an "
900  f"argument with INC access. Colouring is required.")
901 
902  def apply(self, node, options=None):
903  ''' Apply LFRic (Dynamo 0.3) specific OMPLoopTrans.
904 
905  :param node: the Node in the Schedule to check.
906  :type node: :py:class:`psyclone.psyir.nodes.Node`
907  :param options: a dictionary with options for transformations \
908  and validation.
909  :type options: Optional[Dict[str, Any]]
910  :param bool options["reprod"]: \
911  indicating whether reproducible reductions should be used. \
912  By default the value from the config file will be used.
913 
914  '''
915  if not options:
916  options = {}
917 
918  # Since this function potentially modifies the user's option
919  # dictionary, create a copy:
920  options = options.copy()
921  # Make sure the default is set:
922  options["reprod"] = options.get("reprod",
923  Config.get().reproducible_reductions)
924 
925  # This transformation allows to parallelise loops with potential
926  # dependencies because we use cell colouring to guarantee that
927  # neighbours are not updated at the same time.
928  options["force"] = True
929 
930  super().apply(node, options)
931 
932 
934 
935  ''' GOcean-specific orphan OpenMP loop transformation. Adds GOcean
936  specific validity checks (that the node is either an inner or outer
937  Loop).
938 
939  :param str omp_directive: choose which OpenMP loop directive to use. \
940  Defaults to "do".
941  :param str omp_schedule: the OpenMP schedule to use. Must be one of \
942  'runtime', 'static', 'dynamic', 'guided' or 'auto'. Defaults to \
943  'static'.
944 
945  '''
946  def __init__(self, omp_directive="do", omp_schedule="static"):
947  super().__init__(omp_directive=omp_directive,
948  omp_schedule=omp_schedule)
949 
950  def __str__(self):
951  return "Add the selected OpenMP loop directive to a GOcean loop"
952 
953  def validate(self, node, options=None):
954  '''
955  Checks that the supplied node is a valid target for parallelisation
956  using OMP directives.
957 
958  :param node: the candidate loop for parallelising using OMP Do.
959  :type node: :py:class:`psyclone.psyir.nodes.Loop`
960  :param options: a dictionary with options for transformations.
961  :type options: Optional[Dict[str, Any]]
962 
963  :raises TransformationError: if the loop_type of the supplied Loop is \
964  not "inner" or "outer".
965 
966  '''
967  super().validate(node, options=options)
968 
969  # Check we are either an inner or outer loop
970  if node.loop_type not in ["inner", "outer"]:
971  raise TransformationError("Error in "+self.namenamename+" transformation."
972  " The requested loop is not of type "
973  "inner or outer.")
974 
975 
977  '''
978  Apply a colouring transformation to a loop (in order to permit a
979  subsequent parallelisation over colours). For example:
980 
981  >>> invoke = ...
982  >>> schedule = invoke.schedule
983  >>>
984  >>> ctrans = ColourTrans()
985  >>>
986  >>> # Colour all of the loops
987  >>> for child in schedule.children:
988  >>> ctrans.apply(child)
989  >>>
990  >>> # Uncomment the following line to see a text view of the schedule
991  >>> # print(schedule.view())
992 
993  '''
994  def __str__(self):
995  return "Split a loop into colours"
996 
997  def apply(self, node, options=None):
998  '''
999  Converts the Loop represented by :py:obj:`node` into a
1000  nested loop where the outer loop is over colours and the inner
1001  loop is over cells of that colour.
1002 
1003  :param node: the loop to transform.
1004  :type node: :py:class:`psyclone.psyir.nodes.Loop`
1005  :param options: options for the transformation.
1006  :type options: Optional[Dict[str, Any]]
1007 
1008  '''
1009  self.validatevalidatevalidate(node, options=options)
1010 
1011  colours_loop = self._create_colours_loop_create_colours_loop(node)
1012 
1013  # Add this loop as a child of the original node's parent
1014  node.parent.addchild(colours_loop, index=node.position)
1015 
1016  # Add contents of node to colour loop.
1017  colours_loop.loop_body[0].loop_body.children.extend(
1018  node.loop_body.pop_all_children())
1019 
1020  # remove original loop
1021  node.detach()
1022 
1023  def _create_colours_loop(self, node):
1024  '''
1025  Creates a nested loop (colours, and cells of a given colour) to
1026  replace the supplied loop over cells.
1027 
1028  :param node: the loop for which to create a coloured version.
1029  :type node: :py:class:`psyclone.psyir.nodes.Loop`
1030 
1031  :returns: doubly-nested loop over colours and cells of a given colour.
1032  :rtype: :py:class:`psyclone.psyir.nodes.Loop`
1033 
1034  :raises NotImplementedError: this method must be overridden in an \
1035  API-specific sub-class.
1036  '''
1037  raise InternalError("_create_colours_loop() must be overridden in an "
1038  "API-specific sub-class.")
1039 
1040 
1042 
1043  '''Split a Dynamo 0.3 loop over cells into colours so that it can be
1044  parallelised. For example:
1045 
1046  >>> from psyclone.parse.algorithm import parse
1047  >>> from psyclone.psyGen import PSyFactory
1048  >>> import transformations
1049  >>> import os
1050  >>> import pytest
1051  >>>
1052  >>> TEST_API = "dynamo0.3"
1053  >>> _,info=parse(os.path.join(os.path.dirname(os.path.abspath(__file__)),
1054  >>> "tests", "test_files", "dynamo0p3",
1055  >>> "4.6_multikernel_invokes.f90"),
1056  >>> api=TEST_API)
1057  >>> psy = PSyFactory(TEST_API).create(info)
1058  >>> invoke = psy.invokes.get('invoke_0')
1059  >>> schedule = invoke.schedule
1060  >>>
1061  >>> ctrans = Dynamo0p3ColourTrans()
1062  >>> otrans = DynamoOMPParallelLoopTrans()
1063  >>>
1064  >>> # Colour all of the loops
1065  >>> for child in schedule.children:
1066  >>> ctrans.apply(child)
1067  >>>
1068  >>> # Then apply OpenMP to each of the colour loops
1069  >>> for child in schedule.children:
1070  >>> otrans.apply(child.children[0])
1071  >>>
1072  >>> # Uncomment the following line to see a text view of the schedule
1073  >>> # print(schedule.view())
1074 
1075  Colouring in the LFRic (Dynamo 0.3) API is subject to the following rules:
1076 
1077  * Only kernels which operate on 'CELL_COLUMN's and which increment a
1078  field on a continuous function space require colouring. Kernels that
1079  update a field on a discontinuous function space will cause this
1080  transformation to raise an exception. Kernels that only write to a field
1081  on a continuous function space also do not require colouring but are
1082  permitted.
1083  * A kernel may have at most one field with 'GH_INC' access.
1084  * A separate colour map will be required for each field that is coloured
1085  (if an invoke contains >1 kernel call).
1086 
1087  '''
1088  def __str__(self):
1089  return "Split a Dynamo 0.3 loop over cells into colours"
1090 
1091  def apply(self, node, options=None):
1092  '''Performs Dynamo0.3-specific error checking and then uses the parent
1093  class to convert the Loop represented by :py:obj:`node` into a
1094  nested loop where the outer loop is over colours and the inner
1095  loop is over cells of that colour.
1096 
1097  :param node: the loop to transform.
1098  :type node: :py:class:`psyclone.domain.lfric.LFRicLoop`
1099  :param options: a dictionary with options for transformations.\
1100  :type options: Optional[Dict[str, Any]]
1101 
1102  '''
1103  # check node is a loop
1104  super().validate(node, options=options)
1105 
1106  # Check we need colouring
1107  const = LFRicConstants()
1108  if node.field_space.orig_name in \
1109  const.VALID_DISCONTINUOUS_NAMES:
1110  raise TransformationError(
1111  "Error in DynamoColour transformation. Loops iterating over "
1112  "a discontinuous function space are not currently supported.")
1113 
1114  # Colouring is only necessary (and permitted) if the loop is
1115  # over cells. Since this is the default it is represented by
1116  # an empty string.
1117  if node.loop_type != "":
1118  raise TransformationError(
1119  f"Error in DynamoColour transformation. Only loops over cells "
1120  f"may be coloured but this loop is over {node.loop_type}")
1121 
1122  # Check whether we have a field that has INC access
1123  if not node.has_inc_arg():
1124  # TODO generate a warning here as we don't need to colour
1125  # a loop that does not update a field with INC access
1126  pass
1127 
1128  # Check that we're not attempting to colour a loop that is
1129  # already within an OpenMP region (because the loop over
1130  # colours *must* be sequential)
1131  if node.ancestor(OMPDirective):
1132  raise TransformationError("Cannot have a loop over colours "
1133  "within an OpenMP parallel region.")
1134 
1135  super().apply(node, options=options)
1136 
1137  def _create_colours_loop(self, node):
1138  '''
1139  Creates a nested loop (colours, and cells of a given colour) which
1140  can be used to replace the supplied loop over cells.
1141 
1142  :param node: the loop for which to create a coloured version.
1143  :type node: :py:class:`psyclone.psyir.nodes.Loop`
1144 
1145  :returns: doubly-nested loop over colours and cells of a given colour.
1146  :rtype: :py:class:`psyclone.psyir.nodes.Loop`
1147 
1148  '''
1149  # Create a colours loop. This loops over colours and must be run
1150  # sequentially.
1151  colours_loop = node.__class__(parent=node.parent, loop_type="colours")
1152  colours_loop.field_space = node.field_space
1153  colours_loop.iteration_space = node.iteration_space
1154  colours_loop.set_lower_bound("start")
1155  colours_loop.set_upper_bound("ncolours")
1156 
1157  # Create a colour loop. This loops over cells of a particular colour
1158  # and can be run in parallel.
1159  colour_loop = node.__class__(parent=colours_loop.loop_body,
1160  loop_type="colour")
1161  colour_loop.field_space = node.field_space
1162  colour_loop.field_name = node.field_name
1163  colour_loop.iteration_space = node.iteration_space
1164  colour_loop.set_lower_bound("start")
1165  colour_loop.kernel = node.kernel
1166 
1167  if node.upper_bound_name in LFRicConstants().HALO_ACCESS_LOOP_BOUNDS:
1168  # If the original loop went into the halo then this coloured loop
1169  # must also go into the halo.
1170  index = node.upper_bound_halo_depth
1171  colour_loop.set_upper_bound("colour_halo", index)
1172  else:
1173  # No halo access.
1174  colour_loop.set_upper_bound("ncolour")
1175 
1176  # Add this loop as a child of our loop over colours
1177  colours_loop.loop_body.addchild(colour_loop)
1178 
1179  return colours_loop
1180 
1181 
1182 class ParallelRegionTrans(RegionTrans, metaclass=abc.ABCMeta):
1183  '''
1184  Base class for transformations that create a parallel region.
1185 
1186  '''
1187  # The types of node that must be excluded from the section of PSyIR
1188  # being transformed.
1189  excluded_node_types = (CodeBlock, Return, psyGen.HaloExchange)
1190 
1191  def __init__(self):
1192  # Holds the class instance or create call for the type of
1193  # parallel region to generate
1194  self._directive_factory_directive_factory = None
1195  super().__init__()
1196 
1197  @abc.abstractmethod
1198  def __str__(self):
1199  pass # pragma: no cover
1200 
1201  def validate(self, node_list, options=None):
1202  # pylint: disable=arguments-renamed
1203  '''
1204  Check that the supplied list of Nodes are eligible to be
1205  put inside a parallel region.
1206 
1207  :param list node_list: list of nodes to put into a parallel region
1208  :param options: a dictionary with options for transformations.\
1209  :type options: Optional[Dict[str, Any]]
1210  :param bool options["node-type-check"]: this flag controls whether \
1211  or not the type of the nodes enclosed in the region should be \
1212  tested to avoid using unsupported nodes inside a region.
1213 
1214  :raises TransformationError: if the supplied node is an \
1215  InvokeSchedule rather than being within an InvokeSchedule.
1216  :raises TransformationError: if the supplied nodes are not all \
1217  children of the same parent (siblings).
1218 
1219  '''
1220  if isinstance(node_list[0], InvokeSchedule):
1221  raise TransformationError(
1222  f"A {self.name} transformation cannot be applied to an "
1223  f"InvokeSchedule but only to one or more nodes from within an "
1224  f"InvokeSchedule.")
1225 
1226  node_parent = node_list[0].parent
1227 
1228  for child in node_list:
1229  if child.parent is not node_parent:
1230  raise TransformationError(
1231  f"Error in {self.name} transformation: supplied nodes are "
1232  f"not children of the same parent.")
1233  super().validate(node_list, options)
1234 
1235  def apply(self, target_nodes, options=None):
1236  # pylint: disable=arguments-renamed
1237  '''
1238  Apply this transformation to a subset of the nodes within a
1239  schedule - i.e. enclose the specified Loops in the
1240  schedule within a single parallel region.
1241 
1242  :param target_nodes: a single Node or a list of Nodes.
1243  :type target_nodes: (list of) :py:class:`psyclone.psyir.nodes.Node`
1244  :param options: a dictionary with options for transformations.
1245  :type options: Optional[Dict[str, Any]]
1246  :param bool options["node-type-check"]: this flag controls if the \
1247  type of the nodes enclosed in the region should be tested \
1248  to avoid using unsupported nodes inside a region.
1249 
1250  '''
1251 
1252  # Check whether we've been passed a list of nodes or just a
1253  # single node. If the latter then we create ourselves a
1254  # list containing just that node.
1255  node_list = self.get_node_listget_node_list(target_nodes)
1256  self.validatevalidatevalidatevalidate(node_list, options)
1257 
1258  # Keep a reference to the parent of the nodes that are to be
1259  # enclosed within a parallel region. Also keep the index of
1260  # the first child to be enclosed as that will become the
1261  # position of the new !$omp parallel directive.
1262  node_parent = node_list[0].parent
1263  node_position = node_list[0].position
1264 
1265  # Create the parallel directive as a child of the
1266  # parent of the nodes being enclosed and with those nodes
1267  # as its children.
1268  # pylint: disable=not-callable
1269  directive = self._directive_factory_directive_factory(
1270  children=[node.detach() for node in node_list])
1271 
1272  # Add the region directive as a child of the parent
1273  # of the nodes being enclosed and at the original location
1274  # of the first of these nodes
1275  node_parent.addchild(directive, index=node_position)
1276 
1277 
1279  '''
1280  Create an OpenMP SINGLE region by inserting directives. The most
1281  likely use case for this transformation is to wrap around task-based
1282  transformations. The parent region for this should usually also be
1283  a OMPParallelTrans.
1284 
1285  :param bool nowait: whether to apply a nowait clause to this \
1286  transformation. The default value is False
1287 
1288  For example:
1289 
1290  >>> from psyclone.parse.algorithm import parse
1291  >>> from psyclone.psyGen import PSyFactory
1292  >>> api = "gocean1.0"
1293  >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
1294  >>> psy = PSyFactory(api).create(invokeInfo)
1295  >>>
1296  >>> from psyclone.transformations import OMPParallelTrans, OMPSingleTrans
1297  >>> singletrans = OMPSingleTrans()
1298  >>> paralleltrans = OMPParallelTrans()
1299  >>>
1300  >>> schedule = psy.invokes.get('invoke_0').schedule
1301  >>> # Uncomment the following line to see a text view of the schedule
1302  >>> # print(schedule.view())
1303  >>>
1304  >>> # Enclose all of these loops within a single OpenMP
1305  >>> # SINGLE region
1306  >>> singletrans.apply(schedule.children)
1307  >>> # Enclose all of these loops within a single OpenMP
1308  >>> # PARALLEL region
1309  >>> paralleltrans.apply(schedule.children)
1310  >>> # Uncomment the following line to see a text view of the schedule
1311  >>> # print(schedule.view())
1312 
1313  '''
1314  # The types of node that this transformation cannot enclose
1315  excluded_node_types = (CodeBlock, Return, ACCDirective,
1316  psyGen.HaloExchange, OMPSerialDirective,
1317  OMPParallelDirective)
1318 
1319  def __init__(self, nowait=False):
1320  super().__init__()
1321  # Set the type of directive that the base class will use
1322  self._directive_factory_directive_factory_directive_factory = self._directive_directive
1323  # Store whether this single directive has a barrier or not
1324  self._omp_nowait_omp_nowait = nowait
1325 
1326  def __str__(self):
1327  return "Insert an OpenMP Single region"
1328 
1329  @property
1330  def omp_nowait(self):
1331  ''' :returns: whether or not this Single region uses a nowait \
1332  clause to remove the end barrier.
1333  :rtype: bool
1334  '''
1335  return self._omp_nowait_omp_nowait
1336 
1337  @property
1338  def name(self):
1339  '''
1340  :returns: the name of this transformation.
1341  :rtype: str
1342  '''
1343  return "OMPSingleTrans"
1344 
1345  @omp_nowait.setter
1346  def omp_nowait(self, value):
1347  ''' Sets the nowait property that will be specified by
1348  this transformation. Checks that the value supplied in
1349  :py:obj:`value` is a bool
1350 
1351  :param bool value: whether this Single clause should have a \
1352  nowait applied.
1353 
1354  :raises TypeError: if the value parameter is not a bool.
1355 
1356  '''
1357  if not isinstance(value, bool):
1358  raise TypeError(f"Expected nowait to be a bool "
1359  f"but got a {type(value).__name__}")
1360  self._omp_nowait_omp_nowait = value
1361 
1362  def _directive(self, children):
1363  '''
1364  Creates the type of directive needed for this sub-class of
1365  transformation.
1366 
1367  :param children: list of Nodes that will be the children of \
1368  the created directive.
1369  :type children: list of :py:class:`psyclone.psyir.nodes.Node`
1370 
1371  :returns: The directive created for the OpenMP Single Directive
1372  :rtype: :py:class:`psyclone.psyGen.OMPSingleDirective`
1373 
1374  '''
1375  _directive = OMPSingleDirective(children=children,
1376  nowait=self.omp_nowaitomp_nowaitomp_nowaitomp_nowait)
1377  return _directive
1378 
1379  def apply(self, node_list, options=None):
1380  # pylint: disable=arguments-renamed
1381  '''Apply the OMPSingleTrans transformation to the specified node in a
1382  Schedule.
1383 
1384  At code-generation time this node must be within (i.e. a child of)
1385  an OpenMP PARALLEL region. Code generation happens when
1386  :py:meth:`OMPLoopDirective.gen_code` is called, or when the PSyIR
1387  tree is given to a backend.
1388 
1389  If the keyword "nowait" is specified in the options, it will cause a
1390  nowait clause to be added if it is set to True, otherwise no clause
1391  will be added.
1392 
1393  :param node_list: the supplied node or node list to which we will \
1394  apply the OMPSingleTrans transformation
1395  :type node_list: (a list of) :py:class:`psyclone.psyir.nodes.Node`
1396  :param options: a list with options for transformations \
1397  and validation.
1398  :type options: Optional[Dict[str, Any]]
1399  :param bool options["nowait"]:
1400  indicating whether or not to use a nowait clause on this \
1401  single region.
1402 
1403  '''
1404  if not options:
1405  options = {}
1406  if options.get("nowait") is not None:
1407  self.omp_nowaitomp_nowaitomp_nowaitomp_nowait = options.get("nowait")
1408 
1409  super().apply(node_list, options)
1410 
1411 
1413  '''
1414  Create an OpenMP MASTER region by inserting directives. The most
1415  likely use case for this transformation is to wrap around task-based
1416  transformations. Note that adding this directive requires a parent
1417  OpenMP parallel region (which can be inserted by OMPParallelTrans),
1418  otherwise it will produce an error in generation-time.
1419 
1420  For example:
1421 
1422  >>> from psyclone.parse.algorithm import parse
1423  >>> from psyclone.psyGen import PSyFactory
1424  >>> api = "gocean1.0"
1425  >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
1426  >>> psy = PSyFactory(api).create(invokeInfo)
1427  >>>
1428  >>> from psyclone.transformations import OMPParallelTrans, OMPMasterTrans
1429  >>> mastertrans = OMPMasterTrans()
1430  >>> paralleltrans = OMPParallelTrans()
1431  >>>
1432  >>> schedule = psy.invokes.get('invoke_0').schedule
1433  >>> # Uncomment the following line to see a text view of the schedule
1434  >>> # print(schedule.view())
1435  >>>
1436  >>> # Enclose all of these loops within a single OpenMP
1437  >>> # MASTER region
1438  >>> mastertrans.apply(schedule.children)
1439  >>> # Enclose all of these loops within a single OpenMP
1440  >>> # PARALLEL region
1441  >>> paralleltrans.apply(schedule.children)
1442  >>> # Uncomment the following line to see a text view of the schedule
1443  >>> # print(schedule.view())
1444 
1445  '''
1446  # The types of node that this transformation cannot enclose
1447  excluded_node_types = (CodeBlock, Return, ACCDirective,
1448  psyGen.HaloExchange, OMPSerialDirective,
1449  OMPParallelDirective)
1450 
1451  def __init__(self):
1452  super().__init__()
1453  # Set the type of directive that the base class will use
1454  self._directive_factory_directive_factory_directive_factory = OMPMasterDirective
1455 
1456  def __str__(self):
1457  return "Insert an OpenMP Master region"
1458 
1459  @property
1460  def name(self):
1461  '''
1462  :returns: the name of this transformation as a string.
1463  :rtype: str
1464  '''
1465  return "OMPMasterTrans"
1466 
1467 
1469  '''
1470  Create an OpenMP PARALLEL region by inserting directives. For
1471  example:
1472 
1473  >>> from psyclone.parse.algorithm import parse
1474  >>> from psyclone.parse.utils import ParseError
1475  >>> from psyclone.psyGen import PSyFactory
1476  >>> from psyclone.errors import GenerationError
1477  >>> api = "gocean1.0"
1478  >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
1479  >>> psy = PSyFactory(api).create(invokeInfo)
1480  >>>
1481  >>> from psyclone.psyGen import TransInfo
1482  >>> t = TransInfo()
1483  >>> ltrans = t.get_trans_name('GOceanOMPLoopTrans')
1484  >>> rtrans = t.get_trans_name('OMPParallelTrans')
1485  >>>
1486  >>> schedule = psy.invokes.get('invoke_0').schedule
1487  >>> # Uncomment the following line to see a text view of the schedule
1488  >>> # print(schedule.view())
1489  >>>
1490  >>> # Apply the OpenMP Loop transformation to *every* loop
1491  >>> # in the schedule
1492  >>> for child in schedule.children:
1493  >>> ltrans.apply(child)
1494  >>>
1495  >>> # Enclose all of these loops within a single OpenMP
1496  >>> # PARALLEL region
1497  >>> rtrans.apply(schedule.children)
1498  >>> # Uncomment the following line to see a text view of the schedule
1499  >>> # print(schedule.view())
1500 
1501  '''
1502  # The types of node that this transformation cannot enclose
1503  excluded_node_types = (CodeBlock, Return, ACCDirective,
1505 
1506  def __init__(self):
1507  super().__init__()
1508  # Set the type of directive that the base class will use
1509  self._directive_factory_directive_factory_directive_factory = OMPParallelDirective.create
1510 
1511  def __str__(self):
1512  return "Insert an OpenMP Parallel region"
1513 
1514  @property
1515  def name(self):
1516  '''
1517  :returns: the name of this transformation as a string.
1518  :rtype: str
1519  '''
1520  return "OMPParallelTrans"
1521 
1522  def validate(self, node_list, options=None):
1523  '''
1524  Perform OpenMP-specific validation checks.
1525 
1526  :param node_list: list of Nodes to put within parallel region.
1527  :type node_list: list of :py:class:`psyclone.psyir.nodes.Node`
1528  :param options: a dictionary with options for transformations.
1529  :type options: Optional[Dict[str, Any]]
1530  :param bool options["node-type-check"]: this flag controls if the \
1531  type of the nodes enclosed in the region should be tested \
1532  to avoid using unsupported nodes inside a region.
1533 
1534  :raises TransformationError: if the target Nodes are already within \
1535  some OMP parallel region.
1536  '''
1537  if node_list[0].ancestor(OMPDirective):
1538  raise TransformationError("Error in OMPParallel transformation:" +
1539  " cannot create an OpenMP PARALLEL " +
1540  "region within another OpenMP region.")
1541 
1542  # Now call the general validation checks
1543  super().validate(node_list, options)
1544 
1545 
1547  '''
1548  Create an OpenACC parallel region by inserting an 'acc parallel'
1549  directive.
1550 
1551  >>> from psyclone.psyGen import TransInfo
1552  >>> from psyclone.psyir.frontend.fortran import FortranReader
1553  >>> from psyclone.psyir.backend.fortran import FortranWriter
1554  >>> from psyclone.psyir.nodes import Loop
1555  >>> psyir = FortranReader().psyir_from_source("""
1556  ... program do_loop
1557  ... real, dimension(10) :: A
1558  ... integer i
1559  ... do i = 1, 10
1560  ... A(i) = i
1561  ... end do
1562  ... end program do_loop
1563  ... """)
1564  >>> ptrans = TransInfo().get_trans_name('ACCParallelTrans')
1565  >>>
1566  >>> # Enclose the loop within a OpenACC PARALLEL region
1567  >>> ptrans.apply(psyir.walk(Loop))
1568  >>> print(FortranWriter()(psyir))
1569  program do_loop
1570  real, dimension(10) :: a
1571  integer :: i
1572  <BLANKLINE>
1573  !$acc parallel default(present)
1574  do i = 1, 10, 1
1575  a(i) = i
1576  enddo
1577  !$acc end parallel
1578  <BLANKLINE>
1579  end program do_loop
1580  <BLANKLINE>
1581 
1582  '''
1583  excluded_node_types = (CodeBlock, Return, PSyDataNode,
1584  ACCDataDirective, ACCEnterDataDirective,
1586 
1587  def __init__(self, default_present=True):
1588  super().__init__()
1589  if not isinstance(default_present, bool):
1590  raise TypeError(
1591  f"The provided 'default_present' argument must be a "
1592  f"boolean, but found '{default_present}'."
1593  )
1594  self._default_present_default_present = default_present
1595 
1596  def __str__(self):
1597  return "Insert an OpenACC Parallel region"
1598 
1599  def validate(self, node_list, options=None):
1600  '''
1601  Validate this transformation.
1602 
1603  :param node_list: a single Node or a list of Nodes.
1604  :type node_list: :py:class:`psyclone.psyir.nodes.Node` |
1605  List[:py:class:`psyclone.psyir.nodes.Node`]
1606  :param options: a dictionary with options for transformations.
1607  :type options: Optional[Dict[str, Any]]
1608  :param bool options["node-type-check"]: this flag controls if the
1609  type of the nodes enclosed in the region should be tested to
1610  avoid using unsupported nodes inside a region.
1611  :param bool options["default_present"]: this flag controls if the
1612  inserted directive should include the default_present clause.
1613 
1614  '''
1615  super().validate(node_list, options)
1616  if options is not None and "default_present" in options:
1617  if not isinstance(options["default_present"], bool):
1618  raise TransformationError(
1619  f"The provided 'default_present' option must be a "
1620  f"boolean, but found '{options['default_present']}'."
1621  )
1622 
1623  def apply(self, target_nodes, options=None):
1624  '''
1625  Encapsulate given nodes with the ACCParallelDirective.
1626 
1627  :param target_nodes: a single Node or a list of Nodes.
1628  :type target_nodes: :py:class:`psyclone.psyir.nodes.Node` |
1629  List[:py:class:`psyclone.psyir.nodes.Node`]
1630  :param options: a dictionary with options for transformations.
1631  :type options: Optional[Dict[str, Any]]
1632  :param bool options["node-type-check"]: this flag controls if the
1633  type of the nodes enclosed in the region should be tested to
1634  avoid using unsupported nodes inside a region.
1635  :param bool options["default_present"]: this flag controls if the
1636  inserted directive should include the default_present clause.
1637 
1638  '''
1639  if not options:
1640  options = {}
1641  # Check whether we've been passed a list of nodes or just a
1642  # single node. If the latter then we create ourselves a
1643  # list containing just that node.
1644  node_list = self.get_node_listget_node_list(target_nodes)
1645  self.validatevalidatevalidatevalidatevalidate(node_list, options)
1646 
1647  # Keep a reference to the parent of the nodes that are to be
1648  # enclosed within a parallel region. Also keep the index of
1649  # the first child to be enclosed as that will become the
1650  # position of the new !$omp parallel directive.
1651  node_parent = node_list[0].parent
1652  node_position = node_list[0].position
1653 
1654  # Create the parallel directive
1655  directive = ACCParallelDirective(
1656  children=[node.detach() for node in node_list])
1657  directive.default_present = options.get("default_present",
1658  self._default_present_default_present)
1659 
1660  # Add the region directive as a child of the parent
1661  # of the nodes being enclosed and at the original location
1662  # of the first of these nodes
1663  node_parent.addchild(directive, index=node_position)
1664 
1665 
1667  '''Provides a transformation to move a node in the tree. For
1668  example:
1669 
1670  >>> from psyclone.parse.algorithm import parse
1671  >>> from psyclone.psyGen import PSyFactory
1672  >>> ast,invokeInfo=parse("dynamo.F90")
1673  >>> psy=PSyFactory("dynamo0.3").create(invokeInfo)
1674  >>> schedule=psy.invokes.get('invoke_v3_kernel_type').schedule
1675  >>> # Uncomment the following line to see a text view of the schedule
1676  >>> # print(schedule.view())
1677  >>>
1678  >>> from psyclone.transformations import MoveTrans
1679  >>> trans=MoveTrans()
1680  >>> trans.apply(schedule.children[0], schedule.children[2],
1681  ... options = {"position":"after")
1682  >>> # Uncomment the following line to see a text view of the schedule
1683  >>> # print(schedule.view())
1684 
1685  Nodes may only be moved to a new location with the same parent
1686  and must not break any dependencies otherwise an exception is
1687  raised.'''
1688 
1689  def __str__(self):
1690  return "Move a node to a different location"
1691 
1692  @property
1693  def name(self):
1694  ''' Returns the name of this transformation as a string.'''
1695  return "Move"
1696 
1697  def validate(self, node, location, options=None):
1698  # pylint: disable=arguments-differ
1699  ''' validity checks for input arguments.
1700 
1701  :param node: the node to be moved.
1702  :type node: :py:class:`psyclone.psyir.nodes.Node`
1703  :param location: node before or after which the given node\
1704  should be moved.
1705  :type location: :py:class:`psyclone.psyir.nodes.Node`
1706  :param options: a dictionary with options for transformations.
1707  :type options: Optional[Dict[str, Any]]
1708  :param str options["position"]: either 'before' or 'after'.
1709 
1710  :raises TransformationError: if the given node is not an instance \
1711  of :py:class:`psyclone.psyir.nodes.Node`
1712  :raises TransformationError: if the location is not valid.
1713  '''
1714 
1715  # Check that the first argument is a Node
1716  if not isinstance(node, Node):
1717  raise TransformationError(
1718  "In the Move transformation apply method the first argument "
1719  "is not a Node")
1720 
1721  # Check new location conforms to any data dependencies
1722  # This also checks the location and position arguments
1723  if not options:
1724  options = {}
1725  position = options.get("position", "before")
1726  if not node.is_valid_location(location, position=position):
1727  raise TransformationError(
1728  "In the Move transformation apply method, data dependencies "
1729  "forbid the move to the new location")
1730 
1731  def apply(self, node, location, options=None):
1732  '''Move the node represented by :py:obj:`node` before location
1733  :py:obj:`location` (which is also a node) by default and after
1734  if the optional `position` argument is set to 'after'.
1735 
1736  :param node: the node to be moved.
1737  :type node: :py:class:`psyclone.psyir.nodes.Node`
1738  :param location: node before or after which the given node\
1739  should be moved.
1740  :type location: :py:class:`psyclone.psyir.nodes.Node`
1741  :param options: a dictionary with options for transformations.
1742  :type options: Optional[Dict[str, Any]]
1743  :param str options["position"]: either 'before' or 'after'.
1744 
1745  :raises TransformationError: if the given node is not an instance \
1746  of :py:class:`psyclone.psyir.nodes.Node`
1747  :raises TransformationError: if the location is not valid.
1748 
1749  '''
1750  # pylint:disable=arguments-differ
1751 
1752  self.validatevalidatevalidate(node, location, options)
1753 
1754  if not options:
1755  options = {}
1756  position = options.get("position", "before")
1757 
1758  parent = node.parent
1759 
1760  my_node = parent.children.pop(node.position)
1761 
1762  location_index = location.position
1763  if position == "before":
1764  location.parent.children.insert(location_index, my_node)
1765  else:
1766  location.parent.children.insert(location_index+1, my_node)
1767 
1768 
1770  '''This transformation allows the user to modify a loop's bounds so
1771  that redundant computation will be performed. Redundant
1772  computation can result in halo exchanges being modified, new halo
1773  exchanges being added or existing halo exchanges being removed.
1774 
1775  * This transformation should be performed before any
1776  parallelisation transformations (e.g. for OpenMP) to the loop in
1777  question and will raise an exception if this is not the case.
1778 
1779  * This transformation can not be applied to a loop containing a
1780  reduction and will again raise an exception if this is the case.
1781 
1782  * This transformation can only be used to add redundant
1783  computation to a loop, not to remove it.
1784 
1785  * This transformation allows a loop that is already performing
1786  redundant computation to be modified, but only if the depth is
1787  increased.
1788 
1789  '''
1790  def __str__(self):
1791  return "Change iteration space to perform redundant computation"
1792 
1793  def validate(self, node, options=None):
1794  '''Perform various checks to ensure that it is valid to apply the
1795  RedundantComputation transformation to the supplied node
1796 
1797  :param node: the supplied node on which we are performing\
1798  validity checks
1799  :type node: :py:class:`psyclone.psyir.nodes.Node`
1800  :param options: a dictionary with options for transformations.
1801  :type options: Optional[Dict[str, Any]]
1802  :param int options["depth"]: the depth of the stencil if the value \
1803  is provided and None if not.
1804 
1805  :raises TransformationError: if the parent of the loop is a\
1806  :py:class:`psyclone.psyir.nodes.Directive`.
1807  :raises TransformationError: if the parent of the loop is not a\
1808  :py:class:`psyclone.psyir.nodes.Loop` or a\
1809  :py:class:`psyclone.psyGen.DynInvokeSchedule`.
1810  :raises TransformationError: if the parent of the loop is a\
1811  :py:class:`psyclone.psyir.nodes.Loop` but the original loop does\
1812  not iterate over 'colour'.
1813  :raises TransformationError: if the parent of the loop is a\
1814  :py:class:`psyclone.psyir.nodes.Loop` but the parent does not
1815  iterate over 'colours'.
1816  :raises TransformationError: if the parent of the loop is a\
1817  :py:class:`psyclone.psyir.nodes.Loop` but the parent's parent is\
1818  not a :py:class:`psyclone.psyGen.DynInvokeSchedule`.
1819  :raises TransformationError: if this transformation is applied\
1820  when distributed memory is not switched on.
1821  :raises TransformationError: if the loop does not iterate over\
1822  cells, dofs or colour.
1823  :raises TransformationError: if the transformation is setting the\
1824  loop to the maximum halo depth but the loop already computes\
1825  to the maximum halo depth.
1826  :raises TransformationError: if the transformation is setting the\
1827  loop to the maximum halo depth but the loop contains a stencil\
1828  access (as this would result in the field being accessed\
1829  beyond the halo depth).
1830  :raises TransformationError: if the supplied depth value is not an\
1831  integer.
1832  :raises TransformationError: if the supplied depth value is less\
1833  than 1.
1834  :raises TransformationError: if the supplied depth value is not\
1835  greater than 1 when a continuous loop is modified as this is\
1836  the minimum valid value.
1837  :raises TransformationError: if the supplied depth value is not\
1838  greater than the existing depth value, as we should not need\
1839  to undo existing transformations.
1840  :raises TransformationError: if a depth value has been supplied\
1841  but the loop has already been set to the maximum halo depth.
1842 
1843  '''
1844  # pylint: disable=too-many-branches
1845  # check node is a loop
1846  super().validate(node, options=options)
1847 
1848  # Check loop's parent is the InvokeSchedule, or that it is nested
1849  # in a colours loop and perform other colour(s) loop checks,
1850  # otherwise halo exchange placement might fail. The only
1851  # current example where the placement would fail is when
1852  # directives have already been added. This could be fixed but
1853  # it actually makes sense to require redundant computation
1854  # transformations to be applied before adding directives so it
1855  # is not particularly important.
1856  dir_node = node.ancestor(Directive)
1857  if dir_node:
1858  raise TransformationError(
1859  f"In the Dynamo0p3RedundantComputation transformation apply "
1860  f"method the supplied loop is sits beneath a directive of "
1861  f"type {type(dir_node)}. Redundant computation must be applied"
1862  f" before directives are added.")
1863  if not (isinstance(node.parent, DynInvokeSchedule) or
1864  isinstance(node.parent.parent, Loop)):
1865  raise TransformationError(
1866  f"In the Dynamo0p3RedundantComputation transformation "
1867  f"apply method the parent of the supplied loop must be the "
1868  f"DynInvokeSchedule, or a Loop, but found {type(node.parent)}")
1869  if isinstance(node.parent.parent, Loop):
1870  if node.loop_type != "colour":
1871  raise TransformationError(
1872  f"In the Dynamo0p3RedundantComputation transformation "
1873  f"apply method, if the parent of the supplied Loop is "
1874  f"also a Loop then the supplied Loop must iterate over "
1875  f"'colour', but found '{node.loop_type}'")
1876  if node.parent.parent.loop_type != "colours":
1877  raise TransformationError(
1878  f"In the Dynamo0p3RedundantComputation transformation "
1879  f"apply method, if the parent of the supplied Loop is "
1880  f"also a Loop then the parent must iterate over "
1881  f"'colours', but found '{node.parent.parent.loop_type}'")
1882  if not isinstance(node.parent.parent.parent, DynInvokeSchedule):
1883  raise TransformationError(
1884  f"In the Dynamo0p3RedundantComputation transformation "
1885  f"apply method, if the parent of the supplied Loop is "
1886  f"also a Loop then the parent's parent must be the "
1887  f"DynInvokeSchedule, but found {type(node.parent)}")
1888  if not Config.get().distributed_memory:
1889  raise TransformationError(
1890  "In the Dynamo0p3RedundantComputation transformation apply "
1891  "method distributed memory must be switched on")
1892 
1893  # loop must iterate over cell-column, dof or colour. Note, an
1894  # empty loop_type iterates over cell-columns.
1895  if node.loop_type not in ["", "dof", "colour"]:
1896  raise TransformationError(
1897  f"In the Dynamo0p3RedundantComputation transformation apply "
1898  f"method the loop type must be one of '' (cell-columns), 'dof'"
1899  f" or 'colour', but found '{node.loop_type}'")
1900 
1901  # We don't currently support the application of transformations to
1902  # loops containing inter-grid kernels
1903  check_intergrid(node)
1904  const = LFRicConstants()
1905 
1906  if not options:
1907  options = {}
1908  depth = options.get("depth")
1909  if depth is None:
1910  if node.upper_bound_name in const.HALO_ACCESS_LOOP_BOUNDS:
1911  if not node.upper_bound_halo_depth:
1912  raise TransformationError(
1913  "In the Dynamo0p3RedundantComputation transformation "
1914  "apply method the loop is already set to the maximum "
1915  "halo depth so this transformation does nothing")
1916  for call in node.kernels():
1917  for arg in call.arguments.args:
1918  if arg.stencil:
1919  raise TransformationError(
1920  f"In the Dynamo0p3RedundantComputation "
1921  f"transformation apply method the loop "
1922  f"contains field '{arg.name}' with a stencil "
1923  f"access in kernel '{call.name}', so it is "
1924  f"invalid to set redundant computation to "
1925  f"maximum depth")
1926  else:
1927  if not isinstance(depth, int):
1928  raise TransformationError(
1929  f"In the Dynamo0p3RedundantComputation transformation "
1930  f"apply method the supplied depth should be an integer but"
1931  f" found type '{type(depth)}'")
1932  if depth < 1:
1933  raise TransformationError(
1934  "In the Dynamo0p3RedundantComputation transformation "
1935  "apply method the supplied depth is less than 1")
1936 
1937  if node.upper_bound_name in const.HALO_ACCESS_LOOP_BOUNDS:
1938  if node.upper_bound_halo_depth:
1939  if node.upper_bound_halo_depth >= depth:
1940  raise TransformationError(
1941  f"In the Dynamo0p3RedundantComputation "
1942  f"transformation apply method the supplied depth "
1943  f"({depth}) must be greater than the existing halo"
1944  f" depth ({node.upper_bound_halo_depth})")
1945  else:
1946  raise TransformationError(
1947  "In the Dynamo0p3RedundantComputation transformation "
1948  "apply method the loop is already set to the maximum "
1949  "halo depth so can't be set to a fixed value")
1950 
1951  def apply(self, loop, options=None):
1952  # pylint:disable=arguments-renamed
1953  '''Apply the redundant computation transformation to the loop
1954  :py:obj:`loop`. This transformation can be applied to loops iterating
1955  over 'cells or 'dofs'. if :py:obj:`depth` is set to a value then the
1956  value will be the depth of the field's halo over which redundant
1957  computation will be performed. If :py:obj:`depth` is not set to a
1958  value then redundant computation will be performed to the full depth
1959  of the field's halo.
1960 
1961  :param loop: the loop that we are transforming.
1962  :type loop: :py:class:`psyclone.psyGen.LFRicLoop`
1963  :param options: a dictionary with options for transformations.
1964  :type options: Optional[Dict[str, Any]]
1965  :param int options["depth"]: the depth of the stencil. Defaults \
1966  to None.
1967 
1968  '''
1969  self.validatevalidatevalidatevalidate(loop, options=options)
1970  if not options:
1971  options = {}
1972  depth = options.get("depth")
1973 
1974  if loop.loop_type == "":
1975  # Loop is over cells
1976  loop.set_upper_bound("cell_halo", depth)
1977  elif loop.loop_type == "colour":
1978  # Loop is over cells of a single colour
1979  loop.set_upper_bound("colour_halo", depth)
1980  elif loop.loop_type == "dof":
1981  loop.set_upper_bound("dof_halo", depth)
1982  else:
1983  raise TransformationError(
1984  f"Unsupported loop_type '{loop.loop_type}' found in "
1985  f"Dynamo0p3Redundant ComputationTrans.apply()")
1986  # Add/remove halo exchanges as required due to the redundant
1987  # computation
1988  loop.update_halo_exchanges()
1989 
1990 
1992  '''Splits a synchronous halo exchange into a halo exchange start and
1993  halo exchange end. For example:
1994 
1995  >>> from psyclone.parse.algorithm import parse
1996  >>> from psyclone.psyGen import PSyFactory
1997  >>> api = "dynamo0.3"
1998  >>> ast, invokeInfo = parse("file.f90", api=api)
1999  >>> psy=PSyFactory(api).create(invokeInfo)
2000  >>> schedule = psy.invokes.get('invoke_0').schedule
2001  >>> # Uncomment the following line to see a text view of the schedule
2002  >>> # print(schedule.view())
2003  >>>
2004  >>> from psyclone.transformations import Dynamo0p3AsyncHaloExchangeTrans
2005  >>> trans = Dynamo0p3AsyncHaloExchangeTrans()
2006  >>> trans.apply(schedule.children[0])
2007  >>> # Uncomment the following line to see a text view of the schedule
2008  >>> # print(schedule.view())
2009 
2010  '''
2011 
2012  def __str__(self):
2013  return "Changes a synchronous halo exchange into an asynchronous one."
2014 
2015  @property
2016  def name(self):
2017  '''
2018  :returns: the name of this transformation as a string.
2019  :rtype: str
2020  '''
2021  return "Dynamo0p3AsyncHaloExchangeTrans"
2022 
2023  def apply(self, node, options=None):
2024  '''Transforms a synchronous halo exchange, represented by a
2025  HaloExchange node, into an asynchronous halo exchange,
2026  represented by HaloExchangeStart and HaloExchangeEnd nodes.
2027 
2028  :param node: a synchronous haloexchange node.
2029  :type node: :py:obj:`psyclone.psygen.HaloExchange`
2030  :param options: a dictionary with options for transformations.
2031  :type options: Optional[Dict[str, Any]]
2032 
2033  '''
2034  self.validatevalidatevalidate(node, options)
2035 
2036  # add asynchronous start and end halo exchanges and initialise
2037  # them using information from the existing synchronous halo
2038  # exchange
2039  # pylint: disable=protected-access
2040  node.parent.addchild(
2042  node.field, check_dirty=node._check_dirty,
2043  vector_index=node.vector_index, parent=node.parent),
2044  index=node.position)
2045  node.parent.addchild(
2047  node.field, check_dirty=node._check_dirty,
2048  vector_index=node.vector_index, parent=node.parent),
2049  index=node.position)
2050 
2051  # remove the existing synchronous halo exchange
2052  node.detach()
2053 
2054  def validate(self, node, options):
2055  # pylint: disable=signature-differs
2056  '''Internal method to check whether the node is valid for this
2057  transformation.
2058 
2059  :param node: a synchronous Halo Exchange node
2060  :type node: :py:obj:`psyclone.psygen.HaloExchange`
2061  :param options: a dictionary with options for transformations.
2062  :type options: Optional[Dict[str, Any]]
2063 
2064  :raises TransformationError: if the node argument is not a
2065  HaloExchange (or subclass thereof)
2066 
2067  '''
2068  if not isinstance(node, psyGen.HaloExchange) or \
2069  isinstance(node, (LFRicHaloExchangeStart, LFRicHaloExchangeEnd)):
2070  raise TransformationError(
2071  f"Error in Dynamo0p3AsyncHaloExchange transformation. Supplied"
2072  f" node must be a synchronous halo exchange but found "
2073  f"'{type(node)}'.")
2074 
2075 
2077  '''Modifies a kernel so that the number of dofs, number of layers and
2078  number of quadrature points are fixed in the kernel rather than
2079  being passed in by argument.
2080 
2081  >>> from psyclone.parse.algorithm import parse
2082  >>> from psyclone.psyGen import PSyFactory
2083  >>> api = "dynamo0.3"
2084  >>> ast, invokeInfo = parse("file.f90", api=api)
2085  >>> psy=PSyFactory(api).create(invokeInfo)
2086  >>> schedule = psy.invokes.get('invoke_0').schedule
2087  >>> # Uncomment the following line to see a text view of the schedule
2088  >>> # print(schedule.view())
2089  >>>
2090  >>> from psyclone.transformations import Dynamo0p3KernelConstTrans
2091  >>> trans = Dynamo0p3KernelConstTrans()
2092  >>> for kernel in schedule.coded_kernels():
2093  >>> trans.apply(kernel, number_of_layers=150)
2094  >>> kernel_schedule = kernel.get_kernel_schedule()
2095  >>> # Uncomment the following line to see a text view of the
2096  >>> # symbol table
2097  >>> # print(kernel_schedule.symbol_table.view())
2098 
2099  '''
2100 
2101  # ndofs per 3D cell for different function spaces on a quadrilateral
2102  # element for different orders. Formulas kindly provided by Tom Melvin and
2103  # Thomas Gibson. See the Qr table at http://femtable.org/background.html,
2104  # for computed values of w0, w1, w2 and w3 up to order 7.
2105  # Note: w2*trace spaces have dofs only on cell faces and no volume dofs.
2106  # As there is currently no dedicated structure for face dofs in kernel
2107  # constants, w2*trace dofs are included here. w2*trace ndofs formulas
2108  # require the number of reference element faces in the horizontal (4)
2109  # for w2htrace space, in the vertical (2) for w2vtrace space and all (6)
2110  # for w2trace space.
2111 
2112  space_to_dofs = {"w3": (lambda n: (n+1)**3),
2113  "w2": (lambda n: 3*(n+2)*(n+1)**2),
2114  "w1": (lambda n: 3*(n+2)**2*(n+1)),
2115  "w0": (lambda n: (n+2)**3),
2116  "wtheta": (lambda n: (n+2)*(n+1)**2),
2117  "w2h": (lambda n: 2*(n+2)*(n+1)**2),
2118  "w2v": (lambda n: (n+2)*(n+1)**2),
2119  "w2broken": (lambda n: 3*(n+1)**2*(n+2)),
2120  "wchi": (lambda n: (n+1)**3),
2121  "w2trace": (lambda n: 6*(n+1)**2),
2122  "w2htrace": (lambda n: 4*(n+1)**2),
2123  "w2vtrace": (lambda n: 2*(n+1)**2)}
2124 
2125  def __str__(self):
2126  return ("Makes the number of degrees of freedom, the number of "
2127  "quadrature points and the number of layers constant in "
2128  "a Kernel.")
2129 
2130  @property
2131  def name(self):
2132  '''
2133  :returns: the name of this transformation as a string.
2134  :rtype: str
2135  '''
2136  return "Dynamo0p3KernelConstTrans"
2137 
2138  def apply(self, node, options=None):
2139  # pylint: disable=too-many-statements, too-many-locals
2140  '''Transforms a kernel so that the values for the number of degrees of
2141  freedom (if a valid value for the element_order arg is
2142  provided), the number of quadrature points (if the quadrature
2143  arg is set to True) and the number of layers (if a valid value
2144  for the number_of_layers arg is provided) are constant in a
2145  kernel rather than being passed in by argument.
2146 
2147  The "cellshape", "element_order" and "number_of_layers"
2148  arguments are provided to mirror the namelist values that are
2149  input into an LFRic model when it is run.
2150 
2151  Quadrature support is currently limited to XYoZ in ths
2152  transformation. In the case of XYoZ the number of quadrature
2153  points (for horizontal and vertical) are set to the
2154  element_order + 3 in the LFRic infrastructure so their value
2155  is derived.
2156 
2157  :param node: a kernel node.
2158  :type node: :py:obj:`psyclone.domain.lfric.LFRicKern`
2159  :param options: a dictionary with options for transformations.
2160  :type options: Optional[Dict[str, Any]]
2161  :param str options["cellshape"]: the shape of the cells. This is\
2162  provided as it helps determine the number of dofs a field has\
2163  for a particular function space. Currently only "quadrilateral"\
2164  is supported which is also the default value.
2165  :param int options["element_order"]: the order of the cell. In \
2166  combination with cellshape, this determines the number of \
2167  dofs a field has for a particular function space. If it is set \
2168  to None (the default) then the dofs values are not set as \
2169  constants in the kernel, otherwise they are.
2170  :param int options["number_of_layers"]: the number of vertical \
2171  layers in the LFRic model mesh used for this particular run. If \
2172  this is set to None (the default) then the nlayers value is not \
2173  set as a constant in the kernel, otherwise it is.
2174  :param bool options["quadrature"]: whether the number of quadrature \
2175  points values are set as constants in the kernel (True) or not \
2176  (False). The default is False.
2177 
2178  '''
2179  # --------------------------------------------------------------------
2180  def make_constant(symbol_table, arg_position, value,
2181  function_space=None):
2182  '''Utility function that modifies the argument at position
2183  'arg_position' into a compile-time constant with value
2184  'value'.
2185 
2186  :param symbol_table: the symbol table for the kernel holding
2187  the argument that is going to be modified.
2188  :type symbol_table: :py:class:`psyclone.psyir.symbols.SymbolTable`
2189  :param int arg_position: the argument's position in the
2190  argument list.
2191  :param value: the constant value that this argument is going to
2192  be given. Its type depends on the type of the argument.
2193  :type value: int, str or bool
2194  :type str function_space: the name of the function space if there
2195  is a function space associated with this argument. Defaults
2196  to None.
2197 
2198  '''
2199  arg_index = arg_position - 1
2200  try:
2201  symbol = symbol_table.argument_list[arg_index]
2202  except IndexError as err:
2203  raise TransformationError(
2204  f"The argument index '{arg_index}' is greater than the "
2205  f"number of arguments "
2206  f"'{len(symbol_table.argument_list)}'.") from err
2207  # Perform some basic checks on the argument to make sure
2208  # it is the expected type
2209  if not isinstance(symbol.datatype, ScalarType):
2210  raise TransformationError(
2211  f"Expected entry to be a scalar argument but found "
2212  f"'{type(symbol.datatype).__name__}'.")
2213  if symbol.datatype.intrinsic != ScalarType.Intrinsic.INTEGER:
2214  raise TransformationError(
2215  f"Expected entry to be a scalar integer argument "
2216  f"but found '{symbol.datatype}'.")
2217  if symbol.is_constant:
2218  raise TransformationError(
2219  "Expected entry to be a scalar integer argument "
2220  "but found a constant.")
2221 
2222  # Create a new symbol with a known constant value then swap
2223  # it with the argument. The argument then becomes xxx_dummy
2224  # and is unused within the kernel body.
2225  orig_name = symbol.name
2226  new_name = symbol_table.next_available_name(f"{orig_name}_dummy")
2227  local_symbol = DataSymbol(new_name, INTEGER_TYPE,
2228  is_constant=True, initial_value=value)
2229  symbol_table.add(local_symbol)
2230  symbol_table.swap_symbol_properties(symbol, local_symbol)
2231 
2232  if function_space:
2233  print(f" Modified {orig_name}, arg position {arg_position},"
2234  f" function space {function_space}, value {value}.")
2235  else:
2236  print(f" Modified {orig_name}, arg position {arg_position},"
2237  f" value {value}.")
2238  # --------------------------------------------------------------------
2239 
2240  self.validatevalidatevalidate(node, options)
2241 
2242  if not options:
2243  options = {}
2244  number_of_layers = options.get("number_of_layers", None)
2245  quadrature = options.get("quadrature", False)
2246  element_order = options.get("element_order", None)
2247  kernel = node
2248 
2249  arg_list_info = KernCallArgList(kernel)
2250  arg_list_info.generate()
2251  try:
2252  kernel_schedule = kernel.get_kernel_schedule()
2253  except NotImplementedError as excinfo:
2254  raise TransformationError(
2255  f"Failed to parse kernel '{kernel.name}'. Error reported was "
2256  f"'{excinfo}'.") from excinfo
2257 
2258  symbol_table = kernel_schedule.symbol_table
2259  if number_of_layers:
2260  make_constant(symbol_table, arg_list_info.nlayers_positions[0],
2261  number_of_layers)
2262 
2263  if quadrature and arg_list_info.nqp_positions:
2264  # TODO #705 - support the transformation of kernels requiring
2265  # other quadrature types (face/edge, multiple).
2266  if kernel.eval_shapes == ["gh_quadrature_xyoz"]:
2267  make_constant(symbol_table,
2268  arg_list_info.nqp_positions[0]["horizontal"],
2269  element_order+3)
2270  make_constant(symbol_table,
2271  arg_list_info.nqp_positions[0]["vertical"],
2272  element_order+3)
2273  else:
2274  raise TransformationError(
2275  f"Error in Dynamo0p3KernelConstTrans transformation. "
2276  f"Support is currently limited to 'xyoz' quadrature but "
2277  f"found {kernel.eval_shapes}.")
2278 
2279  const = LFRicConstants()
2280  if element_order is not None:
2281  # Modify the symbol table for degrees of freedom here.
2282  for info in arg_list_info.ndf_positions:
2283  if (info.function_space.lower() in
2284  (const.VALID_ANY_SPACE_NAMES +
2285  const.VALID_ANY_DISCONTINUOUS_SPACE_NAMES +
2286  ["any_w2"])):
2287  # skip any_space_*, any_discontinuous_space_* and any_w2
2288  print(f" Skipped dofs, arg position {info.position}, "
2289  f"function space {info.function_space}")
2290  else:
2291  try:
2292  ndofs = Dynamo0p3KernelConstTrans. \
2293  space_to_dofs[
2294  info.function_space](element_order)
2295  except KeyError as err:
2296  raise InternalError(
2297  f"Error in Dynamo0p3KernelConstTrans "
2298  f"transformation. Unsupported function space "
2299  f"'{info.function_space}' found. Expecting one of "
2300  f"""{Dynamo0p3KernelConstTrans.
2301  space_to_dofs.keys()}.""") from err
2302  make_constant(symbol_table, info.position, ndofs,
2303  function_space=info.function_space)
2304 
2305  # Flag that the kernel has been modified
2306  kernel.modified = True
2307 
2308  def validate(self, node, options=None):
2309  '''This method checks whether the input arguments are valid for
2310  this transformation.
2311 
2312  :param node: a dynamo 0.3 kernel node.
2313  :type node: :py:obj:`psyclone.domain.lfric.LFRicKern`
2314  :param options: a dictionary with options for transformations.
2315  :type options: Optional[Dict[str, Any]]
2316  :param str options["cellshape"]: the shape of the elements/cells.
2317  :param int options["element_order"]: the order of the elements/cells.
2318  :param int options["number_of_layers"]: the number of layers to use.
2319  :param bool options["quadrature"]: whether quadrature dimension sizes \
2320  should or shouldn't be set as constants in a kernel.
2321 
2322  :raises TransformationError: if the node argument is not a \
2323  dynamo 0.3 kernel, the cellshape argument is not set to \
2324  "quadrilateral", the element_order argument is not a 0 or a \
2325  positive integer, the number of layers argument is not a \
2326  positive integer, the quadrature argument is not a boolean, \
2327  neither element order nor number of layers arguments are set \
2328  (as the transformation would then do nothing), or the \
2329  quadrature argument is True but the element order is not \
2330  provided (as the former needs the latter).
2331 
2332  '''
2333  if not isinstance(node, LFRicKern):
2334  raise TransformationError(
2335  f"Error in Dynamo0p3KernelConstTrans transformation. Supplied "
2336  f"node must be a dynamo kernel but found '{type(node)}'.")
2337 
2338  if not options:
2339  options = {}
2340  cellshape = options.get("cellshape", "quadrilateral")
2341  element_order = options.get("element_order", None)
2342  number_of_layers = options.get("number_of_layers", None)
2343  quadrature = options.get("quadrature", False)
2344  if cellshape.lower() != "quadrilateral":
2345  # Only quadrilaterals are currently supported
2346  raise TransformationError(
2347  f"Error in Dynamo0p3KernelConstTrans transformation. Supplied "
2348  f"cellshape must be set to 'quadrilateral' but found "
2349  f"'{cellshape}'.")
2350 
2351  if element_order is not None and \
2352  (not isinstance(element_order, int) or element_order < 0):
2353  # element order must be 0 or a positive integer
2354  raise TransformationError(
2355  f"Error in Dynamo0p3KernelConstTrans transformation. The "
2356  f"element_order argument must be >= 0 but found "
2357  f"'{element_order}'.")
2358 
2359  if number_of_layers is not None and \
2360  (not isinstance(number_of_layers, int) or number_of_layers < 1):
2361  # number of layers must be a positive integer
2362  raise TransformationError(
2363  f"Error in Dynamo0p3KernelConstTrans transformation. The "
2364  f"number_of_layers argument must be > 0 but found "
2365  f"'{number_of_layers}'.")
2366 
2367  if quadrature not in [False, True]:
2368  # quadrature must be a boolean value
2369  raise TransformationError(
2370  f"Error in Dynamo0p3KernelConstTrans transformation. The "
2371  f"quadrature argument must be boolean but found "
2372  f"'{quadrature}'.")
2373 
2374  if element_order is None and not number_of_layers:
2375  # As a minimum, element order or number of layers must have values.
2376  raise TransformationError(
2377  "Error in Dynamo0p3KernelConstTrans transformation. At least "
2378  "one of element_order or number_of_layers must be set "
2379  "otherwise this transformation does nothing.")
2380 
2381  if quadrature and element_order is None:
2382  # if quadrature then element order
2383  raise TransformationError(
2384  "Error in Dynamo0p3KernelConstTrans transformation. If "
2385  "quadrature is set then element_order must also be set (as "
2386  "the values of the former are derived from the latter.")
2387 
2388 
2390  '''
2391  Adds an OpenACC "enter data" directive to a Schedule.
2392  For example:
2393 
2394  >>> from psyclone.parse.algorithm import parse
2395  >>> from psyclone.psyGen import PSyFactory
2396  >>> api = "gocean1.0"
2397  >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
2398  >>> psy = PSyFactory(api).create(invokeInfo)
2399  >>>
2400  >>> from psyclone.transformations import \
2401  ACCEnterDataTrans, ACCLoopTrans, ACCParallelTrans
2402  >>> dtrans = ACCEnterDataTrans()
2403  >>> ltrans = ACCLoopTrans()
2404  >>> ptrans = ACCParallelTrans()
2405  >>>
2406  >>> schedule = psy.invokes.get('invoke_0').schedule
2407  >>> # Uncomment the following line to see a text view of the schedule
2408  >>> # print(schedule.view())
2409  >>>
2410  >>> # Apply the OpenACC Loop transformation to *every* loop in the schedule
2411  >>> for child in schedule.children[:]:
2412  ... ltrans.apply(child)
2413  >>>
2414  >>> # Enclose all of these loops within a single OpenACC parallel region
2415  >>> ptrans.apply(schedule)
2416  >>>
2417  >>> # Add an enter data directive
2418  >>> dtrans.apply(schedule)
2419  >>>
2420  >>> # Uncomment the following line to see a text view of the schedule
2421  >>> # print(schedule.view())
2422 
2423  '''
2424  def __str__(self):
2425  return "Adds an OpenACC 'enter data' directive"
2426 
2427  @property
2428  def name(self):
2429  '''
2430  :returns: the name of this transformation.
2431  :rtype: str
2432  '''
2433  return "ACCEnterDataTrans"
2434 
2435  def apply(self, sched, options=None):
2436  # pylint: disable=arguments-renamed
2437  '''Adds an OpenACC "enter data" directive to the invoke associated
2438  with the supplied Schedule. Any fields accessed by OpenACC kernels
2439  within this schedule will be added to this data region in
2440  order to ensure they remain on the target device.
2441 
2442  :param sched: schedule to which to add an "enter data" directive.
2443  :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule`
2444  :param options: a dictionary with options for transformations.
2445  :type options: Optional[Dict[str, Any]]
2446 
2447  '''
2448  # Ensure that the proposed transformation is valid
2449  self.validatevalidatevalidate(sched, options)
2450 
2451  # pylint: disable=import-outside-toplevel
2452  if isinstance(sched, DynInvokeSchedule):
2453  from psyclone.dynamo0p3 import DynACCEnterDataDirective as \
2454  AccEnterDataDir
2455  elif isinstance(sched, GOInvokeSchedule):
2456  from psyclone.gocean1p0 import GOACCEnterDataDirective as \
2457  AccEnterDataDir
2458  elif isinstance(sched, NemoInvokeSchedule):
2459  from psyclone.nemo import NemoACCEnterDataDirective as \
2460  AccEnterDataDir
2461  else:
2462  # Should not get here provided that validate() has done its job
2463  raise InternalError(
2464  f"ACCEnterDataTrans.validate() has not rejected an "
2465  f"(unsupported) schedule of type {type(sched)}")
2466 
2467  # Find the position of the first child statement of the current
2468  # schedule which contains an OpenACC compute construct.
2469  posn = 0
2470  directive_cls = (ACCParallelDirective, ACCKernelsDirective)
2471  directive = sched.walk(directive_cls, stop_type=directive_cls)
2472  if directive:
2473  current = directive[0]
2474  while current not in sched.children:
2475  current = current.parent
2476  posn = sched.children.index(current)
2477 
2478  # Add the directive at the position determined above, i.e. just before
2479  # the first statement containing an OpenACC compute construct.
2480  data_dir = AccEnterDataDir(parent=sched, children=[])
2481  sched.addchild(data_dir, index=posn)
2482 
2483  def validate(self, sched, options=None):
2484  # pylint: disable=arguments-differ, arguments-renamed
2485  '''
2486  Check that we can safely apply the OpenACC enter-data transformation
2487  to the supplied Schedule.
2488 
2489  :param sched: Schedule to which to add an "enter data" directive.
2490  :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule`
2491  :param options: a dictionary with options for transformations.
2492  :type options: Optional[Dict[str, Any]]
2493 
2494  :raises TransformationError: if passed something that is not a \
2495  (subclass of) :py:class:`psyclone.psyir.nodes.Schedule`.
2496 
2497  '''
2498  super().validate(sched, options)
2499 
2500  if not isinstance(sched, Schedule):
2501  raise TransformationError("Cannot apply an OpenACC enter data "
2502  "directive to something that is not a "
2503  "Schedule")
2504 
2505  # Check that we don't already have a data region of any sort
2506  directive_cls = (ACCDataDirective, ACCEnterDataDirective)
2507  if sched.walk(directive_cls, stop_type=directive_cls):
2508  raise TransformationError("Schedule already has an OpenACC data "
2509  "region - cannot add an enter data.")
2510 
2511 
2513  '''
2514  Transform a kernel or routine by adding a "!$acc routine" directive
2515  (causing it to be compiled for the OpenACC accelerator device).
2516  For example:
2517 
2518  >>> from psyclone.parse.algorithm import parse
2519  >>> from psyclone.psyGen import PSyFactory
2520  >>> api = "gocean1.0"
2521  >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api)
2522  >>> psy = PSyFactory(api).create(invokeInfo)
2523  >>>
2524  >>> from psyclone.transformations import ACCRoutineTrans
2525  >>> rtrans = ACCRoutineTrans()
2526  >>>
2527  >>> schedule = psy.invokes.get('invoke_0').schedule
2528  >>> # Uncomment the following line to see a text view of the schedule
2529  >>> # print(schedule.view())
2530  >>> kern = schedule.children[0].children[0].children[0]
2531  >>> # Transform the kernel
2532  >>> rtrans.apply(kern)
2533 
2534  '''
2535  @property
2536  def name(self):
2537  '''
2538  :returns: the name of this transformation class.
2539  :rtype: str
2540  '''
2541  return "ACCRoutineTrans"
2542 
2543  def apply(self, node, options=None):
2544  '''
2545  Add the '!$acc routine' OpenACC directive into the code of the
2546  supplied Kernel (in a PSyKAl API such as GOcean or LFRic) or directly
2547  in the supplied Routine.
2548 
2549  :param node: the kernel call or routine implementation to transform.
2550  :type node: :py:class:`psyclone.psyGen.Kern` or \
2551  :py:class:`psyclone.psyir.nodes.Routine`
2552  :param options: a dictionary with options for transformations.
2553  :type options: Optional[Dict[str, Any]]
2554 
2555  '''
2556  # Check that we can safely apply this transformation
2557  self.validatevalidatevalidate(node, options)
2558 
2559  if isinstance(node, Kern):
2560  # Flag that the kernel has been modified
2561  node.modified = True
2562 
2563  # Get the schedule representing the kernel subroutine
2564  routine = node.get_kernel_schedule()
2565  else:
2566  routine = node
2567 
2568  # Insert the directive to the routine if it doesn't already exist
2569  for child in routine.children:
2570  if isinstance(child, ACCRoutineDirective):
2571  return # The routine is already marked with ACCRoutine
2572  routine.children.insert(0, ACCRoutineDirective())
2573 
2574  def validate(self, node, options=None):
2575  '''
2576  Perform checks that the supplied kernel or routine can be transformed.
2577 
2578  :param node: the kernel or routine which is the target of this
2579  transformation.
2580  :type node: :py:class:`psyclone.psyGen.Kern` |
2581  :py:class:`psyclone.psyir.nodes.Routine`
2582  :param options: a dictionary with options for transformations.
2583  :type options: Optional[Dict[str, Any]]
2584  :param bool options["force"]: whether to allow routines with
2585  CodeBlocks to run on the GPU.
2586 
2587  :raises TransformationError: if the node is not a kernel or a routine.
2588  :raises TransformationError: if the target is a built-in kernel.
2589  :raises TransformationError: if it is a kernel but without an
2590  associated PSyIR.
2591  :raises TransformationError: if any of the symbols in the kernel are
2592  accessed via a module use statement.
2593  :raises TransformationError: if the kernel contains any calls to other
2594  routines.
2595  '''
2596  super().validate(node, options)
2597 
2598  self.validate_it_can_run_on_gpuvalidate_it_can_run_on_gpu(node, options)
2599 
2600 
2602  '''
2603  Enclose a sub-set of nodes from a Schedule within an OpenACC kernels
2604  region (i.e. within "!$acc kernels" ... "!$acc end kernels" directives).
2605 
2606  For example:
2607 
2608  >>> from psyclone.parse.algorithm import parse
2609  >>> from psyclone.psyGen import PSyFactory
2610  >>> api = "nemo"
2611  >>> ast, invokeInfo = parse(NEMO_SOURCE_FILE, api=api)
2612  >>> psy = PSyFactory(api).create(invokeInfo)
2613  >>>
2614  >>> from psyclone.transformations import ACCKernelsTrans
2615  >>> ktrans = ACCKernelsTrans()
2616  >>>
2617  >>> schedule = psy.invokes.get('tra_adv').schedule
2618  >>> # Uncomment the following line to see a text view of the schedule
2619  >>> # print(schedule.view())
2620  >>> kernels = schedule.children[9]
2621  >>> # Transform the kernel
2622  >>> ktrans.apply(kernels)
2623 
2624  '''
2625  excluded_node_types = (CodeBlock, Return, PSyDataNode,
2627 
2628  @property
2629  def name(self):
2630  '''
2631  :returns: the name of this transformation class.
2632  :rtype: str
2633  '''
2634  return "ACCKernelsTrans"
2635 
2636  def apply(self, node, options=None):
2637  '''
2638  Enclose the supplied list of PSyIR nodes within an OpenACC
2639  Kernels region.
2640 
2641  :param node: a node or list of nodes in the PSyIR to enclose.
2642  :type node: (a list of) :py:class:`psyclone.psyir.nodes.Node`
2643  :param options: a dictionary with options for transformations.
2644  :type options: Optional[Dict[str, Any]]
2645  :param bool options["default_present"]: whether or not the kernels \
2646  region should have the 'default present' attribute (indicating \
2647  that data is already on the accelerator). When using managed \
2648  memory this option should be False.
2649 
2650  '''
2651  # Ensure we are always working with a list of nodes, even if only
2652  # one was supplied via the `node` argument.
2653  node_list = self.get_node_listget_node_list(node)
2654 
2655  self.validatevalidatevalidatevalidate(node_list, options)
2656 
2657  parent = node_list[0].parent
2658  start_index = node_list[0].position
2659 
2660  if not options:
2661  options = {}
2662  default_present = options.get("default_present", False)
2663 
2664  # Create a directive containing the nodes in node_list and insert it.
2665  directive = ACCKernelsDirective(
2666  parent=parent, children=[node.detach() for node in node_list],
2667  default_present=default_present)
2668 
2669  parent.children.insert(start_index, directive)
2670 
2671  def validate(self, nodes, options):
2672  # pylint: disable=signature-differs
2673  '''
2674  Check that we can safely enclose the supplied node or list of nodes
2675  within OpenACC kernels ... end kernels directives.
2676 
2677  :param nodes: the proposed PSyIR node or nodes to enclose in the \
2678  kernels region.
2679  :type nodes: (list of) :py:class:`psyclone.psyir.nodes.Node`
2680  :param options: a dictionary with options for transformations.
2681  :type options: Optional[Dict[str, Any]]
2682 
2683  :raises NotImplementedError: if the supplied Nodes belong to \
2684  a GOInvokeSchedule.
2685  :raises TransformationError: if there are no Loops within the \
2686  proposed region.
2687 
2688  '''
2689  # Ensure we are always working with a list of nodes, even if only
2690  # one was supplied via the `nodes` argument.
2691  node_list = self.get_node_listget_node_list(nodes)
2692 
2693  # Check that the front-end is valid
2694  sched = node_list[0].ancestor((NemoInvokeSchedule, DynInvokeSchedule))
2695  if not sched:
2696  raise NotImplementedError(
2697  "OpenACC kernels regions are currently only supported for the "
2698  "nemo and dynamo0.3 front-ends")
2699  super().validate(node_list, options)
2700 
2701  # Check that we have at least one loop or array range within
2702  # the proposed region
2703  for node in node_list:
2704  if (any(assign for assign in node.walk(Assignment)
2705  if assign.is_array_assignment) or node.walk(Loop)):
2706  break
2707  else:
2708  # Branch executed if loop does not exit with a break
2709  raise TransformationError(
2710  "A kernels transformation must enclose at least one loop or "
2711  "array range but none were found.")
2712 
2713 
2715  '''
2716  Add an OpenACC data region around a list of nodes in the PSyIR.
2717  COPYIN, COPYOUT and COPY clauses are added as required.
2718 
2719  For example:
2720 
2721  >>> from psyclone.parse.algorithm import parse
2722  >>> from psyclone.psyGen import PSyFactory
2723  >>> api = "nemo"
2724  >>> ast, invokeInfo = parse(NEMO_SOURCE_FILE, api=api)
2725  >>> psy = PSyFactory(api).create(invokeInfo)
2726  >>>
2727  >>> from psyclone.transformations import ACCKernelsTrans, ACCDataTrans
2728  >>> ktrans = ACCKernelsTrans()
2729  >>> dtrans = ACCDataTrans()
2730  >>>
2731  >>> schedule = psy.invokes.get('tra_adv').schedule
2732  >>> # Uncomment the following line to see a text view of the schedule
2733  >>> # print(schedule.view())
2734  >>>
2735  >>> # Add a kernels construct for execution on the device
2736  >>> kernels = schedule.children[9]
2737  >>> ktrans.apply(kernels)
2738  >>>
2739  >>> # Enclose the kernels in a data construct
2740  >>> kernels = schedule.children[9]
2741  >>> dtrans.apply(kernels)
2742 
2743  '''
2744  excluded_node_types = (CodeBlock, Return, PSyDataNode)
2745 
2746  @property
2747  def name(self):
2748  '''
2749  :returns: the name of this transformation.
2750  :rtype: str
2751 
2752  '''
2753  return "ACCDataTrans"
2754 
2755  def apply(self, node, options=None):
2756  '''
2757  Put the supplied node or list of nodes within an OpenACC data region.
2758 
2759  :param node: the PSyIR node(s) to enclose in the data region.
2760  :type node: (list of) :py:class:`psyclone.psyir.nodes.Node`
2761  :param options: a dictionary with options for transformations.
2762  :type options: Optional[Dict[str, Any]]
2763 
2764  '''
2765  # Ensure we are always working with a list of nodes, even if only
2766  # one was supplied via the `node` argument.
2767  node_list = self.get_node_listget_node_list(node)
2768 
2769  self.validatevalidatevalidatevalidate(node_list, options)
2770 
2771  parent = node_list[0].parent
2772  start_index = node_list[0].position
2773 
2774  # Create a directive containing the nodes in node_list and insert it.
2775  directive = ACCDataDirective(
2776  parent=parent, children=[node.detach() for node in node_list])
2777 
2778  parent.children.insert(start_index, directive)
2779 
2780  def validate(self, nodes, options):
2781  # pylint: disable=signature-differs
2782  '''
2783  Check that we can safely add a data region around the supplied list
2784  of nodes.
2785 
2786  :param nodes: the proposed node(s) to enclose in a data region.
2787  :type nodes: List[:py:class:`psyclone.psyir.nodes.Node`] |
2788  :py:class:`psyclone.psyir.nodes.Node`
2789  :param options: a dictionary with options for transformations.
2790  :type options: Optional[Dict[str, Any]]
2791 
2792  :raises TransformationError: if the Schedule to which the nodes
2793  belong already has an 'enter data' directive.
2794  :raises TransformationError: if any of the nodes are themselves
2795  data directives.
2796  :raises TransformationError: if an array of structures needs to be
2797  deep copied (this is not currently supported).
2798 
2799  '''
2800  # Ensure we are always working with a list of nodes, even if only
2801  # one was supplied via the `nodes` argument.
2802  node_list = self.get_node_listget_node_list(nodes)
2803 
2804  super().validate(node_list, options)
2805 
2806  # Check that the Schedule to which the nodes belong does not already
2807  # have an 'enter data' directive.
2808  schedule = node_list[0].root
2809  acc_dirs = schedule.walk(ACCEnterDataDirective)
2810  if acc_dirs:
2811  raise TransformationError(
2812  "Cannot add an OpenACC data region to a schedule that "
2813  "already contains an 'enter data' directive.")
2814  # Check that we don't have any accesses to arrays of derived types
2815  # that we can't yet deep copy.
2816  for node in node_list:
2817  for sref in node.walk(StructureReference):
2818 
2819  # Find the loop variables for all Loops that contain this
2820  # access and are themselves within the data region.
2821  loop_vars = []
2822  cursor = sref.ancestor(Loop, limit=node)
2823  while cursor:
2824  loop_vars.append(Signature(cursor.variable.name))
2825  cursor = cursor.ancestor(Loop)
2826 
2827  # Now check whether any of these loop variables appear within
2828  # the structure reference.
2829  # Loop over each component of the structure reference that is
2830  # an array access.
2831  array_accesses = sref.walk(ArrayMixin)
2832  for access in array_accesses:
2833  if not isinstance(access, StructureMember):
2834  continue
2835  var_accesses = VariablesAccessInfo(access.indices)
2836  for var in loop_vars:
2837  if var not in var_accesses.all_signatures:
2838  continue
2839  # For an access such as my_struct(ii)%my_array(ji)
2840  # then if we're inside a loop over it we would actually
2841  # need a loop to do the deep copy:
2842  # do ii = 1, N
2843  # !$acc data copyin(my_struct(ii)%my_array)
2844  # end do
2845  raise TransformationError(
2846  f"Data region contains a structure access "
2847  f"'{sref.debug_string()}' where component "
2848  f"'{access.name}' is an array and is iterated over"
2849  f" (variable '{var}'). Deep copying of data for "
2850  f"structures is only supported where the deepest "
2851  f"component is the one being iterated over.")
2852 
2853 
2855  '''
2856  Transformation that removes any accesses of imported data from the supplied
2857  kernel and places them in the caller. The values/references are then passed
2858  by argument into the kernel.
2859  '''
2860  @property
2861  def name(self):
2862  '''
2863  :returns: the name of this transformation.
2864  :rtype: str
2865  '''
2866  return "KernelImportsToArguments"
2867 
2868  def __str__(self):
2869  return ("Convert the imported variables used inside the kernel "
2870  "into arguments and modify the InvokeSchedule to pass them"
2871  " in the kernel call.")
2872 
2873  def validate(self, node, options=None):
2874  '''
2875  Check that the supplied node is a valid target for this transformation.
2876 
2877  :param node: the PSyIR node to validate.
2878  :type node: :py:class:`psyclone.psyGen.CodedKern`
2879  :param options: a dictionary with options for transformations.
2880  :type options: Optional[Dict[str, Any]]
2881 
2882  :raises TransformationError: if the supplied node is not a CodedKern.
2883  :raises TransformationError: if this transformation is not applied to \
2884  a Gocean API Invoke.
2885  :raises TransformationError: if the supplied kernel contains wildcard \
2886  imports of symbols from one or more containers (e.g. a USE without\
2887  an ONLY clause in Fortran).
2888  '''
2889  if not isinstance(node, CodedKern):
2890  raise TransformationError(
2891  f"The {self.name} transformation can only be applied to "
2892  f"CodedKern nodes but found '{type(node).__name__}' instead.")
2893 
2894  invoke_schedule = node.ancestor(InvokeSchedule)
2895  if not isinstance(invoke_schedule, GOInvokeSchedule):
2896  raise TransformationError(
2897  f"The {self.name} transformation is currently only supported "
2898  f"for the GOcean API but got an InvokeSchedule of type: "
2899  f"'{type(invoke_schedule).__name__}'")
2900 
2901  # Check that there are no unqualified imports or undeclared symbols
2902  try:
2903  kernel = node.get_kernel_schedule()
2904  except SymbolError as err:
2905  raise TransformationError(
2906  f"Kernel '{node.name}' contains undeclared symbol: "
2907  f"{err.value}") from err
2908 
2909  symtab = kernel.symbol_table
2910  for container in symtab.containersymbols:
2911  if container.wildcard_import:
2912  raise TransformationError(
2913  f"Kernel '{node.name}' has a wildcard import of symbols "
2914  f"from container '{container.name}'. This is not "
2915  f"supported.")
2916 
2917  # TODO #649. Check for variables accessed by the kernel but declared
2918  # in an outer scope.
2919 
2920  def apply(self, node, options=None):
2921  '''
2922  Convert the imported variables used inside the kernel into arguments
2923  and modify the InvokeSchedule to pass the same imported variables to
2924  the kernel call.
2925 
2926  :param node: a kernel call.
2927  :type node: :py:class:`psyclone.psyGen.CodedKern`
2928  :param options: a dictionary with options for transformations.
2929  :type options: Optional[Dict[str, Any]]
2930 
2931  '''
2932 
2933  self.validatevalidatevalidate(node, options)
2934 
2935  kernel = node.get_kernel_schedule()
2936  symtab = kernel.symbol_table
2937  invoke_symtab = node.ancestor(InvokeSchedule).symbol_table
2938  count_imported_vars_removed = 0
2939 
2940  # Transform each imported variable into an argument.
2941  # TODO #11: When support for logging is added, we could warn the user
2942  # if no imports are found in the kernel.
2943  for imported_var in kernel.symbol_table.imported_symbols[:]:
2944  count_imported_vars_removed += 1
2945 
2946  # Resolve the data type information if it is not available
2947  # pylint: disable=unidiomatic-typecheck
2948  if (type(imported_var) is Symbol or
2949  isinstance(imported_var.datatype, UnresolvedType)):
2950  updated_sym = imported_var.resolve_type()
2951  # If we have a new symbol then we must update the symbol table
2952  if updated_sym is not imported_var:
2953  kernel.symbol_table.swap(imported_var, updated_sym)
2954  # pylint: enable=unidiomatic-typecheck
2955 
2956  # Copy the imported symbol into the InvokeSchedule SymbolTable
2957  invoke_symtab.copy_external_import(
2958  updated_sym, tag="AlgArgs_" + updated_sym.name)
2959 
2960  # Keep a reference to the original container so that we can
2961  # update it after the interface has been updated.
2962  container = updated_sym.interface.container_symbol
2963 
2964  # Convert the symbol to an argument and add it to the argument list
2965  current_arg_list = symtab.argument_list
2966  # An argument does not have an initial value.
2967  was_constant = updated_sym.is_constant
2968  updated_sym.is_constant = False
2969  updated_sym.initial_value = None
2970  if was_constant:
2971  # Imported constants lose the constant value but are read-only
2972  # TODO: When #633 and #11 are implemented, warn the user that
2973  # they should transform the constants to literal values first.
2974  updated_sym.interface = ArgumentInterface(
2975  ArgumentInterface.Access.READ)
2976  else:
2977  updated_sym.interface = ArgumentInterface(
2978  ArgumentInterface.Access.READWRITE)
2979  current_arg_list.append(updated_sym)
2980  symtab.specify_argument_list(current_arg_list)
2981 
2982  # Convert PSyIR DataTypes to Gocean VALID_SCALAR_TYPES
2983  # TODO #678: Ideally this strings should be provided by the GOcean
2984  # API configuration.
2985  go_space = ""
2986  if updated_sym.datatype.intrinsic == ScalarType.Intrinsic.REAL:
2987  go_space = "go_r_scalar"
2988  elif (updated_sym.datatype.intrinsic ==
2989  ScalarType.Intrinsic.INTEGER):
2990  go_space = "go_i_scalar"
2991  else:
2992  raise TypeError(
2993  f"The imported variable '{updated_sym.name}' could not be "
2994  f"promoted to an argument because the GOcean "
2995  f"infrastructure does not have any scalar type equivalent "
2996  f"to the PSyIR {updated_sym.datatype} type.")
2997 
2998  # Add the imported variable in the call argument list
2999  node.arguments.append(updated_sym.name, go_space)
3000 
3001  # Check whether we still need the Container symbol from which
3002  # this import was originally accessed
3003  if not kernel.symbol_table.symbols_imported_from(container) and \
3004  not container.wildcard_import:
3005  kernel.symbol_table.remove(container)
3006 
3007  if count_imported_vars_removed > 0:
3008  node.modified = True
3009 
3010 
3011 # For Sphinx AutoAPI documentation generation
3012 __all__ = [
3013  "ACCEnterDataTrans",
3014  "ACCDataTrans",
3015  "ACCKernelsTrans",
3016  "ACCLoopTrans",
3017  "ACCParallelTrans",
3018  "ACCRoutineTrans",
3019  "ColourTrans",
3020  "Dynamo0p3AsyncHaloExchangeTrans",
3021  "Dynamo0p3ColourTrans",
3022  "Dynamo0p3KernelConstTrans",
3023  "Dynamo0p3OMPLoopTrans",
3024  "Dynamo0p3RedundantComputationTrans",
3025  "DynamoOMPParallelLoopTrans",
3026  "GOceanOMPLoopTrans",
3027  "GOceanOMPParallelLoopTrans",
3028  "KernelImportsToArguments",
3029  "MoveTrans",
3030  "OMPLoopTrans",
3031  "OMPMasterTrans",
3032  "OMPParallelLoopTrans",
3033  "OMPParallelTrans",
3034  "OMPSingleTrans",
3035  "ParallelRegionTrans",
3036 ]
def validate(self, node, options=None)
Definition: psyGen.py:2799
def validate(self, nodes, options)
def apply(self, node, options=None)
def apply(self, sched, options=None)
def validate(self, sched, options=None)
def apply(self, node, options=None)
def apply(self, node, options=None)
def apply(self, target_nodes, options=None)
def validate(self, node_list, options=None)
def apply(self, node, options=None)
def validate(self, node, options=None)
def apply(self, node, options=None)
def validate(self, node, options=None)
def apply(self, node, location, options=None)
def validate(self, node, location, options=None)
def validate(self, node_list, options=None)
def apply(self, node_list, options=None)
def apply(self, node, options=None)
def apply(self, target_nodes, options=None)
def validate(self, node_list, options=None)