Reference Guide  2.5.0
omp_directives.py
1 # -----------------------------------------------------------------------------
2 # BSD 3-Clause License
3 #
4 # Copyright (c) 2021-2024, Science and Technology Facilities Council.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are met:
9 #
10 # * Redistributions of source code must retain the above copyright notice, this
11 # list of conditions and the following disclaimer.
12 #
13 # * Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
16 #
17 # * Neither the name of the copyright holder nor the names of its
18 # contributors may be used to endorse or promote products derived from
19 # this software without specific prior written permission.
20 #
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 # POSSIBILITY OF SUCH DAMAGE.
33 # -----------------------------------------------------------------------------
34 # Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab
35 # A. B. G. Chalk, STFC Daresbury Lab
36 # I. Kavcic, Met Office
37 # C.M. Maynard, Met Office / University of Reading
38 # J. Henrichs, Bureau of Meteorology
39 # -----------------------------------------------------------------------------
40 
41 ''' This module contains the implementation of the various OpenMP Directive
42 nodes.'''
43 
44 
45 import abc
46 import itertools
47 import sympy
48 
49 from psyclone.configuration import Config
50 from psyclone.core import AccessType, VariablesAccessInfo
51 from psyclone.errors import (GenerationError,
52  UnresolvedDependencyError)
53 from psyclone.f2pygen import (AssignGen, UseGen, DeclGen, DirectiveGen,
54  CommentGen)
55 from psyclone.psyir.nodes.array_mixin import ArrayMixin
56 from psyclone.psyir.nodes.array_reference import ArrayReference
57 from psyclone.psyir.nodes.assignment import Assignment
58 from psyclone.psyir.nodes.call import Call
59 from psyclone.psyir.nodes.directive import StandaloneDirective, \
60  RegionDirective
61 from psyclone.psyir.nodes.if_block import IfBlock
62 from psyclone.psyir.nodes.intrinsic_call import IntrinsicCall
63 from psyclone.psyir.nodes.literal import Literal
64 from psyclone.psyir.nodes.loop import Loop
65 from psyclone.psyir.nodes.operation import BinaryOperation
66 from psyclone.psyir.nodes.omp_clauses import OMPGrainsizeClause, \
67  OMPNowaitClause, OMPNogroupClause, OMPNumTasksClause, OMPPrivateClause, \
68  OMPDefaultClause, OMPReductionClause, OMPScheduleClause, \
69  OMPFirstprivateClause, OMPDependClause
70 from psyclone.psyir.nodes.ranges import Range
71 from psyclone.psyir.nodes.reference import Reference
72 from psyclone.psyir.nodes.routine import Routine
73 from psyclone.psyir.nodes.schedule import Schedule
74 from psyclone.psyir.nodes.structure_reference import StructureReference
75 from psyclone.psyir.nodes.while_loop import WhileLoop
76 from psyclone.psyir.symbols import INTEGER_TYPE, ScalarType
77 
78 # OMP_OPERATOR_MAPPING is used to determine the operator to use in the
79 # reduction clause of an OpenMP directive.
80 OMP_OPERATOR_MAPPING = {AccessType.SUM: "+"}
81 
82 
83 class OMPDirective(metaclass=abc.ABCMeta):
84  '''
85  Base mixin class for all OpenMP-related directives.
86 
87  This class is useful to provide a unique common ancestor to all the
88  OpenMP directives, for instance when traversing the tree with
89  `node.walk(OMPDirective)`
90 
91  Note that classes inheriting from it must place the OMPDirective in
92  front of the other Directive node sub-class, so that the Python
93  MRO gives preference to this class's attributes.
94  '''
95  _PREFIX = "OMP"
96 
97 
98 class OMPRegionDirective(OMPDirective, RegionDirective, metaclass=abc.ABCMeta):
99  '''
100  Base class for all OpenMP region-related directives.
101 
102  '''
103  def _get_reductions_list(self, reduction_type):
104  '''
105  Returns the names of all scalars within this region that require a
106  reduction of type 'reduction_type'. Returned names will be unique.
107 
108  TODO #514 - this only works for the PSyKAl APIs currently. It needs
109  extending/replacing with the use of the PSyIR Dependence Analysis.
110 
111  :param reduction_type: the reduction type (e.g. AccessType.SUM) to
112  search for.
113  :type reduction_type: :py:class:`psyclone.core.access_type.AccessType`
114 
115  :returns: names of scalar arguments with reduction access.
116  :rtype: list[str]
117 
118  '''
119  result = []
120 
121  # TODO #514: not yet working with generic PSyIR, so skip for now
122  if Config.get().api not in ('gocean1.0', 'dynamo0.3'):
123  return result
124 
125  const = Config.get().api_conf().get_constants()
126  for call in self.kernelskernels():
127  for arg in call.arguments.args:
128  if arg.argument_type in const.VALID_SCALAR_NAMES:
129  if arg.descriptor.access == reduction_type:
130  if arg.name not in result:
131  result.append(arg.name)
132  return result
133 
134 
136  metaclass=abc.ABCMeta):
137  ''' Base class for all OpenMP-related standalone directives. '''
138 
139 
141  '''
142  Class representing an OpenMP Declare Target directive in the PSyIR.
143 
144  '''
145  def gen_code(self, parent):
146  '''Generate the fortran OMP Declare Target Directive and any
147  associated code.
148 
149  :param parent: the parent Node in the Schedule to which to add our \
150  content.
151  :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
152  '''
153  # Check the constraints are correct
154  self.validate_global_constraintsvalidate_global_constraintsvalidate_global_constraints()
155 
156  # Generate the code for this Directive
157  parent.add(DirectiveGen(parent, "omp", "begin", "declare", "target"))
158 
159  def begin_string(self):
160  '''Returns the beginning statement of this directive, i.e.
161  "omp routine". The visitor is responsible for adding the
162  correct directive beginning (e.g. "!$").
163 
164  :returns: the opening statement of this directive.
165  :rtype: str
166 
167  '''
168  return "omp declare target"
169 
171  '''
172  Perform validation checks that can only be done at code-generation
173  time.
174 
175  :raises GenerationError: if this directive is not the first statement \
176  in a routine.
177 
178  '''
179  if self.parentparent and (not isinstance(self.parentparent, Routine) or
180  self.parentparent.children[0] is not self):
181  raise GenerationError(
182  f"A OMPDeclareTargetDirective must be the first child (index "
183  f"0) of a Routine but found one as child {self.position} of a "
184  f"{type(self.parent).__name__}.")
185 
187 
188 
190  '''
191  Class representing an OpenMP TASKWAIT directive in the PSyIR.
192 
193  '''
195  '''
196  Perform validation checks that can only be done at code-generation
197  time.
198 
199  :raises GenerationError: if this OMPTaskwait is not enclosed \
200  within some OpenMP parallel region.
201 
202  '''
203  # It is only at the point of code generation that we can check for
204  # correctness (given that we don't mandate the order that a user
205  # can apply transformations to the code). As a Parallel Child
206  # directive, we must have an OMPParallelDirective as an ancestor
207  # somewhere back up the tree.
208  if not self.ancestorancestor(OMPParallelDirective,
209  excluding=OMPParallelDoDirective):
210  raise GenerationError(
211  "OMPTaskwaitDirective must be inside an OMP parallel region "
212  "but could not find an ancestor OMPParallelDirective node")
213 
215 
216  def gen_code(self, parent):
217  '''Generate the fortran OMP Taskwait Directive and any associated
218  code
219 
220  :param parent: the parent Node in the Schedule to which to add our \
221  content.
222  :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
223  '''
224  # Check the constraints are correct
225  self.validate_global_constraintsvalidate_global_constraintsvalidate_global_constraints()
226 
227  # Generate the code for this Directive
228  parent.add(DirectiveGen(parent, "omp", "begin", "taskwait", ""))
229  # No children or end code for this node
230 
231  def begin_string(self):
232  '''Returns the beginning statement of this directive, i.e.
233  "omp taskwait". The visitor is responsible for adding the
234  correct directive beginning (e.g. "!$").
235 
236  :returns: the opening statement of this directive.
237  :rtype: str
238 
239  '''
240  return "omp taskwait"
241 
242 
243 class OMPSerialDirective(OMPRegionDirective, metaclass=abc.ABCMeta):
244  '''
245  Abstract class representing OpenMP serial regions, e.g.
246  OpenMP SINGLE or OpenMP Master.
247 
248  '''
249 
250  def _valid_dependence_literals(self, lit1, lit2):
251  '''
252  Compares two Nodes to check whether they are a valid dependency
253  pair. For two Nodes where at least one is a Literal, a valid
254  dependency is any pair of Literals.
255 
256  :param lit1: the first node to compare.
257  :type lit1: :py:class:`psyclone.psyir.nodes.Node`
258  :param lit2: the second node to compare.
259  :type lit2: :py:class:`psyclone.psyir.nodes.Node`
260 
261  :returns: whether or not these two nodes can be used as a valid
262  dependency pair in OpenMP.
263  :rtype: bool
264 
265  '''
266  # Check both are Literals
267 
268  # If a Literal index into an array that is a dependency
269  # has calculated dependency to a
270  # non-Literal array index, this will return False, as this is not
271  # currently supported in PSyclone.
272 
273  # If literals are not the same its fine, since a(1) is not a
274  # dependency to a(2), so as long as both are Literals this is ok.
275  return isinstance(lit1, Literal) and isinstance(lit2, Literal)
276 
277  def _valid_dependence_ranges(self, arraymixin1, arraymixin2, index):
278  '''
279  Compares two ArrayMixin Nodes to check whether they are a valid
280  dependency pair on the provided index. For two Nodes where at least
281  one has a Range at this index, they must both have Ranges, and both be
282  full ranges, i.e. ":".
283 
284  :param arraymixin1: the first node to validate.
285  :type arraymixin1: :py:class:`psyclone.psyir.nodes.ArrayMixin`
286  :param arraymixin2: the second node to validate.
287  :type arraymixin2: :py:class:`psyclone.psyir.nodes.ArrayMixin`
288 
289  :returns: whether or not these two nodes can be used as a valid
290  dependency pair in OpenMP, based upon the provided index.
291  :rtype: bool
292  '''
293  # We know both inputs are always ArrayMixin as this is a private
294  # function.
295  # Check both are Ranges
296  if (not isinstance(arraymixin1.indices[index], Range) or not
297  isinstance(arraymixin2.indices[index], Range)):
298  # Range index to a dependency has calculated dependency to a
299  # non-Range index, which is not currently supported in PSyclone
300  return False
301 
302  # To be valid, both ranges need to be full ranges.
303 
304  # If we have a range index between dependencies which does not cover
305  # the full array range, it is not currently supported in
306  # PSyclone (due to OpenMP limitations), so False will be returned.
307  return (arraymixin1.is_full_range(index) and
308  arraymixin2.is_full_range(index))
309 
310  def _compute_accesses_get_start_stop_step(self, preceding_nodes, task,
311  symbol):
312  '''
313  Computes the start, stop and step values used in the _compute_accesses
314  function by searching through the preceding nodes for the last access
315  to the symbol.
316 
317  :param preceding_nodes: a list of nodes that precede the task in the
318  tree.
319  :type preceding_nodes: List[:py:class:`psyclone.psyir.nodes.Node`]
320  :param task: the OMPTaskDirective node being used in
321  _compute_accesses.
322  :type task: :py:class:`psyclone.psyir.nodes.OMPTaskDirective`
323  :param symbol: the symbol used by the ref in _compute_accesses.
324  :type symbol: :py:class:`psyclone.psyir.symbols.Symbol`
325 
326  :returns: a tuple containing the start, stop and step nodes (or None
327  if there is no value).
328  :rtype: Tuple[:py:class`psyclone.psyir.nodes.Node, None]
329  '''
330  start = None
331  stop = None
332  step = None
333  for node in preceding_nodes:
334  # Only Assignment, Loop or Call nodes can modify the symbol in our
335  # Reference
336  if not isinstance(node, (Assignment, Loop, Call)):
337  continue
338  # At the moment we allow all IntrinsicCall nodes through, and
339  # assume that all IntrinsicCall nodes we find don't modify
340  # symbols, but only read from them.
341  if isinstance(node, Call) and not isinstance(node, IntrinsicCall):
342  # Currently opting to fail on any non-intrinsic Call.
343  # Potentially it might be possible to check if the Symbol is
344  # written to and only if so then raise an error
346  "Found a Call in preceding_nodes, which "
347  "is not yet supported.")
348  if isinstance(node, Assignment) and node.lhs.symbol == symbol:
349  start = node.rhs.copy()
350  break
351  if isinstance(node, Loop) and node.variable == symbol:
352  # If the loop is not an ancestor of the task then
353  # we don't currently support it.
354  ancestor_loop = task.ancestor(Loop, limit=self)
355  is_ancestor = False
356  while ancestor_loop is not None:
357  if ancestor_loop == node:
358  is_ancestor = True
359  break
360  ancestor_loop = ancestor_loop.ancestor(Loop, limit=self)
361  if not is_ancestor:
363  f"Found a dependency index that "
364  f"was updated as a Loop variable "
365  f"that is not an ancestor Loop of "
366  f"the task. The variable is "
367  f"'{node.variable.name}'.")
368  # It has to be an ancestor loop, so we want to find the start,
369  # stop and step Nodes
370  start, stop, step = node.start_expr, node.stop_expr, \
371  node.step_expr
372  break
373  return (start, stop, step)
374 
375  def _compute_accesses(self, ref, preceding_nodes, task):
376  '''
377  Computes the set of accesses for a Reference or BinaryOperation
378  Node, based upon the preceding_nodes and containing task.
379 
380  The returned result is either a set of Literals, or Literals
381  and BinaryOperations.
382 
383  If ref is a BinaryOperation, it needs to be of the following formats:
384  1. Reference ADD/SUB Literal
385  2. Literal ADD Reference
386  3. Binop(Literal MUL Literal) ADD Reference
387  4. Reference ADD/SUB Binop(Literal MUL Literal)
388 
389 
390  :param ref: the Reference or BinaryOperation node to compute
391  accesses for.
392  :type ref: Union[:py:class:`psyclone.psyir.nodes.Reference,
393  :py:class:`psyclone.psyir.nodes.BinaryOperation]
394  :param preceding_nodes: a list of nodes that precede the task in the
395  tree.
396  :type preceding_nodes: List[:py:class:`psyclone.psyir.nodes.Node`]
397  :param task: the OMPTaskDirective node containing ref as a child.
398  :type task: :py:class:`psyclone.psyir.nodes.OMPTaskDirective`
399 
400  :raises UnresolvedDependencyError: If the ref contains an unsupported
401  BinaryOperation structure, such as
402  a non-ADD/SUB/MUL operator. Check
403  error message for more details.
404  :raises UnresolvedDependencyError: If preceding_nodes contains a Call
405  node.
406  :raises UnresolvedDependencyError: If ref is a BinaryOperation and
407  neither child of ref is a Literal
408  or BinaryOperation.
409  :raises UnresolvedDependencyError: If there is a dependency between
410  ref (a BinaryOperation) and a
411  previously set constant.
412  :raises UnresolvedDependencyError: If there is a dependency between
413  ref and a Loop variable that is
414  not an ancestor of task.
415  :raises UnresolvedDependencyError: If preceding_nodes contains a
416  dependent loop with a non-Literal
417  step.
418 
419  :returns: a list of the dependency values for the input ref, or a
420  dict of the start, stop and step values.
421  :rtype: List[Union[:py:class:`psyclone.psyir.nodes.Literal`,
422  :py:class:`psyclone.psyir.nodes.BinaryOperation`]] or
423  Dict[str: py:class:`psyclone.psyir.nodes.Node`]
424  '''
425  if isinstance(ref, Reference):
426  symbol = ref.symbol
427  else:
428  # Get the symbol out of the Binop, and store some other
429  # important information. We store the step value of the
430  # ancestor loop (which will be the value of the Literal, or
431  # one of the Literals if an operand is a BinaryOperation).
432  # In the case that one of the operands is a BinaryOperation,
433  # we also store a "num_entries" value, which is based upon the
434  # multiplier of the step value. This is how we can handle
435  # cases such as array(i+57) if the step value is 32, the
436  # dependencies stored would be array(i+32) and array(i+64).
437  if isinstance(ref.children[0], Literal):
438  if ref.operator == BinaryOperation.Operator.ADD:
439  # Have Literal + Reference. Store the symbol of the
440  # Reference and the integer value of the Literal.
441  symbol = ref.children[1].symbol
442  binop_val = int(ref.children[0].value)
443  num_entries = 2
444  else:
446  f"Found a dependency index that is "
447  f"a BinaryOperation where the "
448  f"format is Literal OP Reference "
449  f"with a non-ADD operand "
450  f"which is not supported. "
451  f"The operation found was "
452  f"'{ref.debug_string()}'.")
453  elif isinstance(ref.children[1], Literal):
454  # Have Reference OP Literal. Store the symbol of the
455  # Reference, and the integer value of the Literal. If the
456  # operator is negative, then we store the value negated.
457  if ref.operator in [BinaryOperation.Operator.ADD,
458  BinaryOperation.Operator.SUB]:
459  symbol = ref.children[0].symbol
460  binop_val = int(ref.children[1].value)
461  num_entries = 2
462  if ref.operator == BinaryOperation.Operator.SUB:
463  binop_val = -binop_val
464  else:
466  f"Found a dependency index that is "
467  f"a BinaryOperation where the "
468  f"Operator is neither ADD not SUB "
469  f"which is not supported. "
470  f"The operation found was "
471  f"'{ref.debug_string()}'.")
472 
473  elif isinstance(ref.children[0], BinaryOperation):
474  if ref.operator == BinaryOperation.Operator.ADD:
475  # Have Binop ADD Reference. Store the symbol of the
476  # Reference, and store the binop. The binop is of
477  # structure Literal MUL Literal, where the second
478  # Literal is to the step of a parent loop.
479  symbol = ref.children[1].symbol
480  binop = ref.children[0]
481  if binop.operator != BinaryOperation.Operator.MUL:
483  f"Found a dependency index that is a "
484  f"BinaryOperation with a child "
485  f"BinaryOperation with a non-MUL operator "
486  f"which is not supported. "
487  f"The operation found was "
488  f"'{ref.debug_string()}'.")
489  # These binary operations are format of Literal MUL Literal
490  # where step_val is the 2nd literal and the multiplier
491  # is the first literal
492  if (not (isinstance(binop.children[0], Literal) and
493  isinstance(binop.children[1], Literal))):
495  f"Found a dependency index that is a "
496  f"BinaryOperation with a child "
497  f"BinaryOperation with a non-Literal child "
498  f"which is not supported. "
499  f"The operation found was "
500  f"'{ref.debug_string()}'.")
501  # We store the step of the parent loop in binop_val, and
502  # use the other operand to compute how many entries we
503  # need to compute to validate this dependency list.
504  binop_val = int(binop.children[1].value)
505  num_entries = int(binop.children[0].value)+1
506  else:
508  f"Found a dependency index that is "
509  f"a BinaryOperation where the "
510  f"format is BinaryOperator OP "
511  f"Reference with a non-ADD operand "
512  f"which is not supported. "
513  f"The operation found was "
514  f"'{ref.debug_string()}'.")
515  elif isinstance(ref.children[1], BinaryOperation):
516  # Have Reference ADD/SUB Binop. Store the symbol of the
517  # Reference, and store the binop. The binop is of
518  # structure Literal MUL Literal, where the second
519  # Literal is to the step of a parent loop.
520  if ref.operator in [BinaryOperation.Operator.ADD,
521  BinaryOperation.Operator.SUB]:
522  symbol = ref.children[0].symbol
523  binop = ref.children[1]
524  if binop.operator != BinaryOperation.Operator.MUL:
526  f"Found a dependency index that is a "
527  f"BinaryOperation with a child "
528  f"BinaryOperation with a non-MUL operator "
529  f"which is not supported. "
530  f"The operation found was "
531  f"'{ref.debug_string()}'.")
532  # These binary operations are format of Literal MUL Literal
533  # where step_val is the 2nd literal.
534  if (not (isinstance(binop.children[0], Literal) and
535  isinstance(binop.children[1], Literal))):
537  f"Found a dependency index that is a "
538  f"BinaryOperation with an operand "
539  f"BinaryOperation with a non-Literal operand "
540  f"which is not supported. "
541  f"The operation found was "
542  f"'{ref.debug_string()}'.")
543  # We store the step of the parent loop in binop_val, and
544  # use the other operand to compute how many entries we
545  # need to compute to validate this dependency list.
546  binop_val = int(binop.children[1].value)
547  num_entries = int(binop.children[0].value)+1
548  if ref.operator == BinaryOperation.Operator.SUB:
549  # If the operator is SUB then we use 1 less
550  # entry in the list, as Fortran arrays start
551  # from 1.
552  binop_val = -binop_val
553  num_entries = num_entries-1
554  else:
556  f"Found a dependency index that is "
557  f"a BinaryOperation where the "
558  f"format is Reference OP "
559  f"BinaryOperation with a non-ADD, "
560  f"non-SUB operand "
561  f"which is not supported. "
562  f"The operation found was "
563  f"'{ref.debug_string()}'.")
564  else:
566  f"Found a dependency index that is a "
567  f"BinaryOperation where neither child "
568  f"is a Literal or BinaryOperation. "
569  f"PSyclone can't validate "
570  f"this dependency. "
571  f"The operation found was "
572  f"'{ref.debug_string()}'.")
573  start, stop, step = self._compute_accesses_get_start_stop_step_compute_accesses_get_start_stop_step(
574  preceding_nodes, task, symbol)
575 
576  if isinstance(ref, BinaryOperation):
577  output_list = []
578  if step is None:
579  # Found no ancestor loop, PSyclone cannot handle
580  # this case, as BinaryOperations created by OMPTaskDirective
581  # in dependencies will always be based on ancestor loops.
583  f"Found a dependency between a "
584  f"BinaryOperation and a previously "
585  f"set constant value. "
586  f"PSyclone cannot yet handle this "
587  f"interaction. The error occurs from "
588  f"'{ref.debug_string()}'.")
589  # If the step isn't a Literal value, then we can't compute what
590  # the address accesses at compile time, so we can't validate the
591  # dependency.
592  if not isinstance(step, Literal):
594  f"Found a dependency index that is a "
595  f"Loop variable with a non-Literal step "
596  f"which we can't resolve in PSyclone. "
597  f"Containing node is '{ref.debug_string()}'.")
598  # If the start and stop are both Literals, we can compute a set
599  # of accesses this BinaryOperation is related to precisely.
600  if (isinstance(start, Literal) and isinstance(stop, Literal)):
601  # Fill the output list with all values from start to stop
602  # incremented by step
603  startval = int(start.value)
604  stopval = int(stop.value)
605  stepval = int(step.value)
606  # We loop from startval to stopval + 1 as PSyIR loops will
607  # include stopval, wheras Python loops do not.
608  for i in range(startval, stopval + 1, stepval):
609  new_x = i + binop_val
610  output_list.append(Literal(f"{new_x}", INTEGER_TYPE))
611  return output_list
612 
613  # If they are not all literals, we have a special case. In this
614  # case we return a dict containing start, stop and step and this
615  # is compared directly to the start, stop and step of a
616  # corresponding access.
617  output_list = {}
618  output_list["start"] = BinaryOperation.create(
619  BinaryOperation.Operator.ADD,
620  start.copy(),
621  Literal(f"{binop_val}", INTEGER_TYPE)
622  )
623  output_list["stop"] = stop.copy()
624  output_list["step"] = step.copy()
625  return output_list
626  if step is None:
627  # Result for an assignment.
628  output_list = [start]
629  return output_list
630  output_list = []
631  # If step is not a Literal then we probably can't resolve this
632  if not isinstance(step, Literal):
634  "Found a dependency index that is a "
635  "Loop variable with a non-Literal step "
636  "which we can't resolve in PSyclone.")
637  # Special case when all are Literals
638  if (isinstance(start, Literal) and isinstance(stop, Literal)):
639  # Fill the output list with all values from start to stop
640  # incremented by step
641  startval = int(start.value)
642  stopval = int(stop.value)
643  stepval = int(step.value)
644  # We loop from startval to stopval + 1 as PSyIR loops will include
645  # stopval, wheras Python loops do not.
646  for i in range(startval, stopval + 1, stepval):
647  output_list.append(Literal(f"{i}", INTEGER_TYPE))
648  return output_list
649 
650  # the sequence only. In this case, we have a non-parent loop reference
651  # which is also firstprivate (as shared indices are forbidden in
652  # OMPTaskDirective already), so is essentially a constant. In this
653  # case therefore we will have an unknown start and stop value, so we
654  # verify this dependency differently. To ensure this special case is
655  # understood as a special case, we return a dict with the 3 members.
656  output_list = {}
657  output_list["start"] = start.copy()
658  output_list["stop"] = stop.copy()
659  output_list["step"] = step.copy()
660  return output_list
661 
662  def _check_valid_overlap(self, sympy_ref1s, sympy_ref2s):
663  '''
664  Takes two lists of SymPy expressions, and checks that any overlaps
665  between the expressions is valid for OpenMP depend clauses.
666 
667  :param sympy_ref1s: the list of SymPy expressions corresponding to
668  the first dependency clause.
669  :type sympy_ref1s: List[:py:class:`sympy.core.basic.Basic`]
670  :param sympy_ref2s: the list of SymPy expressions corresponding to
671  the second dependency clause.
672  :type sympy_ref2s: List[:py:class:`sympy.core.basic.Basic`]
673 
674  :returns: whether this is a valid overlap according to the OpenMP \
675  standard.
676  :rtype: bool
677  '''
678  # r1_min will contain the minimum computed value for (ref + value)
679  # from the list. r1_max will contain the maximum computed value.
680  # Loop through the values in sympy_ref1s, and compute the maximum
681  # and minumum values in that list. These correspond to the maximum and
682  # minimum values used for accessing the array relative to the
683  # symbol used as a base access.
684  values = [int(member) for member in sympy_ref1s]
685  r1_min = min(values)
686  r1_max = max(values)
687  # Loop over the elements in sympy_ref2s and check that the dependency
688  # is valid in OpenMP.
689  for member in sympy_ref2s:
690  # If the value is between min and max of r1 then we check that
691  # the value is in the values list
692  val = int(member)
693  if r1_min <= val <= r1_max:
694  if val not in values:
695  # Found incompatible dependency between two
696  # array accesses, ref1 is in range r1_min
697  # to r1_max, but doesn't contain val.
698  # This can happen if we have two loops with
699  # different start values or steps.
700  return False
701  return True
702 
703  def _valid_dependence_ref_binop(self, ref1, ref2, task1, task2):
704  '''
705  Compares two Reference/BinaryOperation Nodes to check they are a set
706  of dependencies that are valid according to OpenMP. Both these nodes
707  are array indices on the same array symbol, so for OpenMP to correctly
708  compute this dependency, we must guarantee at compile time that we
709  know the addresses/array sections covered by this index are identical.
710 
711  :param ref1: the first Node to compare.
712  :type ref1: Union[:py:class:`psyclone.psyir.nodes.Reference`, \
713  :py:class:`psyclone.psyir.nodes.BinaryOperation`]
714  :param ref2: the second Node to compare.
715  :type ref2: Union[:py:class:`psyclone.psyir.nodes.Reference`, \
716  :py:class:`psyclone.psyir.nodes.BinaryOperation`]
717  :param task1: the task containing ref1 as a child.
718  :type task1: :py:class:`psyclone.psyir.nodes.OMPTaskDirective`
719  :param task2: the task containing ref2 as a child.
720  :type task2: :py:class:`psyclone.psyir.nodes.OMPTaskDirective`
721 
722  :raises GenerationError: If ref1 and ref2 are dependencies on the \
723  same array, and one does not contain a \
724  Reference but the other does.
725  :raises GenerationError: If ref1 and ref2 are dependencies on the \
726  same array but are References to different \
727  variables.
728  :raises GenerationError: If ref1 and ref2 are dependencies on the \
729  same array, but the computed index values \
730  are not dependent according to OpenMP.
731 
732  :returns: whether or not these two nodes can be used as a valid \
733  dependency on the same array in OpenMP.
734  :rtype: bool
735 
736  '''
737  # pylint: disable=import-outside-toplevel
738  from psyclone.psyir.backend.sympy_writer import SymPyWriter
739  # In this case we have two Reference/BinaryOperation as indices.
740  # We need to attempt to find their value set and check the value
741  # set matches.
742  # Find all the nodes before these tasks
743  preceding_t1 = task1.preceding(reverse=True)
744  preceding_t2 = task2.preceding(reverse=True)
745  # Get access list for each ref
746  try:
747  ref1_accesses = self._compute_accesses_compute_accesses(ref1, preceding_t1, task1)
748  ref2_accesses = self._compute_accesses_compute_accesses(ref2, preceding_t2, task2)
749  except UnresolvedDependencyError:
750  # If we get a UnresolvedDependencyError from compute_accesses, then
751  # we found an access that isn't able to be handled by PSyclone, so
752  # dependencies based on it need to be handled by a taskwait
753  return False
754 
755  # Create our sympy_writer
756  sympy_writer = SymPyWriter()
757 
758  # If either of the returned accesses are a dict, this is a special
759  # case where both must be a dict and have the same start, stop and
760  # step.
761  if isinstance(ref1_accesses, dict) or isinstance(ref2_accesses, dict):
762  # If they aren't both dicts then we need to return False as
763  # the special case isn't handled correctly.
764  if type(ref1_accesses) is not type(ref2_accesses):
765  return False
766  # If they're both dicts then we need the step to be equal for
767  # this dependency to be satisfiable.
768  if ref1_accesses["step"] != ref2_accesses["step"]:
769  return False
770  # Now we know the step is equal, we need the start values to be
771  # start1 = start2 + x * step, where x is an integer value.
772  # We use SymPy to solve this equation and perform this check.
773  sympy_start1 = sympy_writer(ref1_accesses["start"])
774  sympy_start2 = sympy_writer(ref2_accesses["start"])
775  sympy_step = sympy_writer(ref2_accesses["step"])
776  b_sym = sympy.Symbol('b')
777  result = sympy.solvers.solve(sympy_start1 - sympy_start2 +
778  b_sym * sympy_step, b_sym)
779  if not isinstance(result[0], sympy.core.numbers.Integer):
780  return False
781 
782  # If we know the start and step are aligned, all possible
783  # dependencies are aligned so we don't need to check the stop
784  # value.
785  return True
786 
787  # If we have a list, then we have a set of Literal values.
788  # We use the SymPyWriter to convert these objects to expressions
789  # we can use to obtain integer values for these Literals
790  sympy_ref1s = sympy_writer(ref1_accesses)
791  sympy_ref2s = sympy_writer(ref2_accesses)
792  return self._check_valid_overlap_check_valid_overlap(sympy_ref1s, sympy_ref2s)
793 
794  def _check_dependency_pairing_valid(self, node1, node2, task1, task2):
795  '''
796  Given a pair of nodes which are children of a OMPDependClause, this
797  function checks whether the described dependence is correctly
798  described by the OpenMP standard.
799  If the dependence is not going to be handled safely, this function
800  returns False, else it returns true.
801 
802  :param node1: the first input node to check.
803  :type node1: :py:class:`psyclone.psyir.nodes.Reference`
804  :param node2: the second input node to check.
805  :type node2: :py:class:`psyclone.psyir.nodes.Reference`
806  :param task1: the OMPTaskDirective node containing node1 as a \
807  dependency
808  :type task1: :py:class:`psyclone.psyir.nodes.OMPTaskDirective`
809  :param task2: the OMPTaskDirective node containing node2 as a \
810  dependency
811  :type task2: :py:class:`psyclone.psyir.nodes.OMPTaskDirective`
812 
813  :returns: whether the dependence is going to be handled safely \
814  according to the OpenMP standard.
815  :rtype: bool
816  '''
817  # Checking the symbol is the same works. If the symbol is not the same
818  # then there's no dependence, so its valid.
819  if node1.symbol != node2.symbol:
820  return True
821  # The typing check handles any edge case where we have node1 and node2
822  # pointing to the same symbol, but one is a specialised reference type
823  # and the other is a base Reference type - this is unlikely to happen
824  # but we check just in case. In this case we have to assume there is
825  # an unhandled dependency
826  if type(node1) is not type(node2):
827  return False
828  # For structure reference we need to check they access
829  # the same member. If they don't, no dependence so valid.
830  if isinstance(node1, StructureReference):
831  # If either is a StructureReference here they must both be,
832  # as they access the same symbol.
833 
834  # We can't just do == on the Member child, as that
835  # will recurse and check the array indices for any
836  # ArrayMixin children
837 
838  # Check the signature of both StructureReference
839  # to see if they are accessing the same data
840  ref0_sig = node1.get_signature_and_indices()[0]
841  ref1_sig = node2.get_signature_and_indices()[0]
842  if ref0_sig != ref1_sig:
843  return True
844 
845  # If we have (exactly) Reference objects we filter out
846  # non-matching ones with the symbol check, and matching ones
847  # are always valid since they are simple accesses.
848  # pylint: disable=unidiomatic-typecheck
849  if type(node1) is Reference:
850  return True
851 
852  # All remaining objects are some sort of Array access
853  array1 = None
854  array2 = None
855 
856  # PSyclone will not handle dependencies on multiple array indexes
857  # at the moment, so we return False.
858  if len(node1.walk(ArrayMixin)) > 1 or len(node2.walk(ArrayMixin)) > 1:
859  return False
860  if isinstance(node1, ArrayReference):
861  array1 = node1
862  array2 = node2
863  else:
864  array1 = node1.walk(ArrayMixin)[0]
865  array2 = node2.walk(ArrayMixin)[0]
866  for i, index in enumerate(array1.indices):
867  if (isinstance(index, Literal) or
868  isinstance(array2.indices[i], Literal)):
869  valid = self._valid_dependence_literals_valid_dependence_literals(
870  index, array2.indices[i])
871  elif (isinstance(index, Range) or
872  isinstance(array2.indices[i], Range)):
873  valid = self._valid_dependence_ranges_valid_dependence_ranges(
874  array1, array2, i)
875  else:
876  # The only remaining option is that the indices are
877  # References or BinaryOperations
878  valid = self._valid_dependence_ref_binop_valid_dependence_ref_binop(
879  index, array2.indices[i], task1, task2)
880  # If this was not valid then return False, else keep checking
881  # other indices
882  if not valid:
883  return False
884 
885  return valid
886 
887  def _validate_task_dependencies(self):
888  '''
889  Validates all task dependencies in this OMPSerialDirective region are
890  valid within the restraints of OpenMP & PSyclone. This is done through
891  a variety of helper functions, and checks each pair of tasks' inout,
892  outin and outout combinations.
893 
894  Any task dependencies that are detected and will not be handled by
895  OpenMP's depend clause will be handled through the addition of
896  OMPTaskwaitDirective nodes.
897 
898  :raises NotImplementedError: If this region contains both an \
899  OMPTaskDirective and an OMPTaskloopDirective.
900  '''
901  # pylint: disable=import-outside-toplevel
902  from psyclone.psyir.nodes.omp_task_directive import OMPTaskDirective
903  tasks = self.walkwalk(OMPTaskDirective)
904  # For now we disallow Tasks and Taskloop directives in the same Serial
905  # Region
906  if len(tasks) > 0 and any(self.walkwalk(OMPTaskloopDirective)):
907  raise NotImplementedError("OMPTaskDirectives and "
908  "OMPTaskloopDirectives are not "
909  "currently supported inside the same "
910  "parent serial region.")
911 
912  pairs = itertools.combinations(tasks, 2)
913 
914  # List of tuples of dependent nodes that aren't handled by OpenMP
915  unhandled_dependent_nodes = []
916  # Lowest and highest position nodes contain the abs_position of each
917  # tuple inside unhandled_dependent_nodes, used for sorting the arrays
918  # and checking if the unhandled dependency has a taskwait inbetween.
919  lowest_position_nodes = []
920  highest_position_nodes = []
921 
922  for pair in pairs:
923  task1 = pair[0]
924  task2 = pair[1]
925 
926  # Find all References in each tasks' depend clauses
927  # Should we cache these instead?
928  task1_in = [x for x in task1.input_depend_clause.children
929  if isinstance(x, Reference)]
930  task1_out = [x for x in task1.output_depend_clause.children
931  if isinstance(x, Reference)]
932  task2_in = [x for x in task2.input_depend_clause.children
933  if isinstance(x, Reference)]
934  task2_out = [x for x in task2.output_depend_clause.children
935  if isinstance(x, Reference)]
936 
937  inout = list(itertools.product(task1_in, task2_out))
938  outin = list(itertools.product(task1_out, task2_in))
939  outout = list(itertools.product(task1_out, task2_out))
940  # Loop through each potential dependency pair and check they
941  # will be handled correctly.
942 
943  # Need to predefine satisfiable in case all lists are empty.
944  satisfiable = True
945  for mem in inout + outin + outout:
946  satisfiable = \
947  self._check_dependency_pairing_valid_check_dependency_pairing_valid(mem[0], mem[1],
948  task1, task2)
949  # As soon as any is not satisfiable, then we don't need to
950  # continue checking.
951  if not satisfiable:
952  break
953 
954  # If we have an unsatisfiable dependency between two tasks, then we
955  # need to have a taskwait between them always. We need to loop up
956  # to find these tasks' parents which are closest to the Schedule
957  # which contains both tasks, and use them as the nodes which are
958  # dependent.
959  if not satisfiable:
960  # Find the lowest schedule containing both nodes.
961  schedule1 = task1.ancestor(Schedule, shared_with=task2)
962  # Find the closest ancestor to the common schedule.
963  task1_proxy = task1
964  while task1_proxy.parent is not schedule1:
965  task1_proxy = task1_proxy.parent
966  task2_proxy = task2
967  while task2_proxy.parent is not schedule1:
968  task2_proxy = task2_proxy.parent
969 
970  # Now we have the closest nodes to the closest common ancestor
971  # schedule, so add them to the unhandled_dependent_nodes list.
972  if task1_proxy is not task2_proxy:
973  # If they end up with the same proxy, they have the same
974  # ancestor tree but are in different schedules. This means
975  # that they are in something like an if/else block with
976  # one node in an if block and the other in the else block.
977  # These dependencies we can ignore as they are not ever
978  # both executed
979  unhandled_dependent_nodes.append(
980  (task1_proxy, task2_proxy))
981  lowest_position_nodes.append(min(task1_proxy.abs_position,
982  task2_proxy.abs_position))
983  highest_position_nodes.append(
984  max(task1_proxy.abs_position,
985  task2_proxy.abs_position))
986 
987  # If we have no invalid dependencies we can return early
988  if len(unhandled_dependent_nodes) == 0:
989  return
990 
991  # Need to sort lists by highest_position_nodes value, and then
992  # by lowest value if tied.
993  # Based upon
994  # https://stackoverflow.com/questions/9764298/how-to-sort-two-
995  # lists-which-reference-each-other-in-the-exact-same-way
996 
997  # sorted_highest_positions and sorted_lowest_positions contain
998  # the abs_positions for the corresponding Nodes in the tuple at
999  # the same index in sorted_dependency_pairs. The
1000  # sorted_dependency_pairs list contains each pair of unhandled
1001  # dependency nodes that were previously computed, but sorted
1002  # according to abs_position in the tree.
1003  sorted_highest_positions, sorted_lowest_positions, \
1004  sorted_dependency_pairs = (list(t) for t in
1005  zip(*sorted(zip(
1006  highest_position_nodes,
1007  lowest_position_nodes,
1008  unhandled_dependent_nodes)
1009  )))
1010  # The location of any node where need to place an OMPTaskwaitDirective
1011  # to ensure code correctness. The size of this list should be
1012  # minimised during construction as we will not add another
1013  # OMPTaskwaitDirective when a dependency will be handled already by
1014  # an existing OMPTaskwaitDirective or one that will be created during
1015  # this process.
1016  taskwait_location_nodes = []
1017  # Stores the abs_position for each of the OMPTaskwaitDirective nodes
1018  # that does or will exist.
1019  taskwait_location_abs_pos = []
1020  for taskwait in self.walkwalk(OMPTaskwaitDirective):
1021  taskwait_location_nodes.append(taskwait)
1022  taskwait_location_abs_pos.append(taskwait.abs_position)
1023  # Add the first node to have a taskwait placed in front of it into the
1024  # list, unless one of the existing OMPTaskwaitDirective nodes already
1025  # satisfies the dependency.
1026  lo_abs_pos = sorted_lowest_positions[0]
1027  hi_abs_pos = sorted_highest_positions[0]
1028  for ind, taskwait_loc in enumerate(taskwait_location_nodes):
1029  if (taskwait_location_abs_pos[ind] <= hi_abs_pos and
1030  taskwait_location_abs_pos[ind] >= lo_abs_pos):
1031  # We potentially already satisfy this initial dependency
1032  if (sorted_dependency_pairs[0][1].ancestor(Schedule) is
1033  taskwait_loc.ancestor(Schedule)):
1034  break
1035  else:
1036  taskwait_location_nodes.append(sorted_dependency_pairs[0][1])
1037  taskwait_location_abs_pos.append(sorted_highest_positions[0])
1038 
1039  for index, pairs in enumerate(sorted_dependency_pairs[1:]):
1040  # Add 1 to index here because we're looking from [1:]
1041  lo_abs_pos = sorted_lowest_positions[index+1]
1042  hi_abs_pos = sorted_highest_positions[index+1]
1043  for ind, taskwait_loc in enumerate(taskwait_location_nodes):
1044  if (taskwait_location_abs_pos[ind] <= hi_abs_pos and
1045  taskwait_location_abs_pos[ind] >= lo_abs_pos):
1046  # We have a taskwait meant to be placed here that is
1047  # potentially already satisfied. To check we need to
1048  # ensure that the ancestor schedules of the nodes
1049  # are identical
1050  if (pairs[0].ancestor(Schedule) is
1051  taskwait_loc.ancestor(Schedule)):
1052  break
1053  else:
1054  # If we didn't find a taskwait we plan to add that satisfies
1055  # this dependency, add it to the list
1056  taskwait_location_nodes.append(pairs[1])
1057  taskwait_location_abs_pos.append(hi_abs_pos)
1058  # Now loop through the list in reverse and add taskwaits unless the
1059  # node is already a taskwait
1060  taskwait_location_nodes.reverse()
1061  for taskwait_loc in taskwait_location_nodes:
1062  if isinstance(taskwait_loc, OMPTaskwaitDirective):
1063  continue
1064  node_parent = taskwait_loc.parent
1065  loc = taskwait_loc.position
1066  node_parent.addchild(OMPTaskwaitDirective(), loc)
1067 
1069  '''
1070  Checks that any task dependencies inside this node are valid.
1071  '''
1072  # Perform parent ops
1073  super().lower_to_language_level()
1074 
1075  # Validate any task dependencies in this OMPSerialRegion.
1076  self._validate_task_dependencies_validate_task_dependencies()
1077 
1079  '''
1080  Perform validation checks that can only be done at code-generation
1081  time.
1082 
1083  :raises GenerationError: if this OMPSerial is not enclosed \
1084  within some OpenMP parallel region.
1085  :raises GenerationError: if this OMPSerial is enclosed within \
1086  any OMPSerialDirective subclass region.
1087 
1088  '''
1089  # It is only at the point of code generation that we can check for
1090  # correctness (given that we don't mandate the order that a user
1091  # can apply transformations to the code). As a Parallel Child
1092  # directive, we must have an OMPParallelDirective as an ancestor
1093  # somewhere back up the tree.
1094  # Also check the single region is not enclosed within another OpenMP
1095  # single region.
1096  # It could in principle be allowed for that parent to be a ParallelDo
1097  # directive, however I can't think of a use case that would be done
1098  # best in a parallel code by that pattern
1099  if not self.ancestorancestor(OMPParallelDirective,
1100  excluding=OMPParallelDoDirective):
1101  raise GenerationError(
1102  f"{self._text_name} must be inside an OMP parallel region but "
1103  f"could not find an ancestor OMPParallelDirective node")
1104 
1105  if self.ancestorancestor(OMPSerialDirective):
1106  raise GenerationError(
1107  f"{self._text_name} must not be inside another OpenMP "
1108  f"serial region")
1109 
1110  super().validate_global_constraints()
1111 
1112 
1114  '''
1115  Class representing an OpenMP SINGLE directive in the PSyIR.
1116 
1117  :param bool nowait: argument describing whether this single should have \
1118  a nowait clause applied. Default value is False.
1119  :param kwargs: additional keyword arguments provided to the PSyIR node.
1120  :type kwargs: unwrapped dict.
1121 
1122  '''
1123  _children_valid_format = "Schedule, [OMPNowaitClause]"
1124  # Textual description of the node
1125  _text_name = "OMPSingleDirective"
1126 
1127  def __init__(self, nowait=False, **kwargs):
1128 
1129  self._nowait_nowait = nowait
1130  # Call the init method of the base class once we've stored
1131  # the nowait requirement
1132  super().__init__(**kwargs)
1133  if self._nowait_nowait:
1134  self.childrenchildrenchildren.append(OMPNowaitClause())
1135 
1136  @staticmethod
1137  def _validate_child(position, child):
1138  '''
1139  Decides whether a given child and position are valid for this node.
1140  The rules are:
1141  1. Child 0 must always be a Schedule.
1142  2. Child 1 can only be a OMPNowaitClause.
1143 
1144  :param int position: the position to be validated.
1145  :param child: a child to be validated.
1146  :type child: :py:class:`psyclone.psyir.nodes.Node`
1147 
1148  :return: whether the given child and position are valid for this node.
1149  :rtype: bool
1150 
1151  '''
1152  if position == 0:
1153  return isinstance(child, Schedule)
1154  if position == 1:
1155  return isinstance(child, OMPNowaitClause)
1156  return False
1157 
1158  @property
1159  def nowait(self):
1160  '''
1161  :returns: whether the nowait clause is specified for this directive.
1162  :rtype: bool
1163 
1164  '''
1165  return self._nowait_nowait
1166 
1167  def gen_code(self, parent):
1168  '''Generate the fortran OMP Single Directive and any associated
1169  code
1170 
1171  :param parent: the parent Node in the Schedule to which to add our \
1172  content.
1173  :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
1174  '''
1175  # Check the constraints are correct
1176  self.validate_global_constraintsvalidate_global_constraintsvalidate_global_constraints()
1177 
1178  # Capture the nowait section of the string if required
1179  nowait_string = ""
1180  if self._nowait_nowait:
1181  nowait_string = "nowait"
1182 
1183  parent.add(DirectiveGen(parent, "omp", "begin", "single",
1184  nowait_string))
1185 
1186  # Generate the code for all of this node's children
1187  for child in self.dir_bodydir_body:
1188  child.gen_code(parent)
1189 
1190  # Generate the end code for this node
1191  parent.add(DirectiveGen(parent, "omp", "end", "single", ""))
1192 
1193  def begin_string(self):
1194  '''Returns the beginning statement of this directive, i.e.
1195  "omp single". The visitor is responsible for adding the
1196  correct directive beginning (e.g. "!$").
1197 
1198  :returns: the opening statement of this directive.
1199  :rtype: str
1200 
1201  '''
1202  return "omp single"
1203 
1204  def end_string(self):
1205  '''Returns the end (or closing) statement of this directive, i.e.
1206  "omp end single". The visitor is responsible for adding the
1207  correct directive beginning (e.g. "!$").
1208 
1209  :returns: the end statement for this directive.
1210  :rtype: str
1211 
1212  '''
1213  return "omp end single"
1214 
1215 
1217  '''
1218  Class representing an OpenMP MASTER directive in the PSyclone AST.
1219 
1220  '''
1221 
1222  # Textual description of the node
1223  _text_name = "OMPMasterDirective"
1224 
1225  def gen_code(self, parent):
1226  '''Generate the Fortran OMP Master Directive and any associated
1227  code
1228 
1229  :param parent: the parent Node in the Schedule to which to add our \
1230  content.
1231  :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
1232  '''
1233 
1234  # Check the constraints are correct
1235  self.validate_global_constraintsvalidate_global_constraintsvalidate_global_constraints()
1236 
1237  parent.add(DirectiveGen(parent, "omp", "begin", "master", ""))
1238 
1239  # Generate the code for all of this node's children
1240  for child in self.childrenchildrenchildren:
1241  child.gen_code(parent)
1242 
1243  # Generate the end code for this node
1244  parent.add(DirectiveGen(parent, "omp", "end", "master", ""))
1245 
1246  def begin_string(self):
1247  '''Returns the beginning statement of this directive, i.e.
1248  "omp master". The visitor is responsible for adding the
1249  correct directive beginning (e.g. "!$").
1250 
1251  :returns: the opening statement of this directive.
1252  :rtype: str
1253 
1254  '''
1255  return "omp master"
1256 
1257  def end_string(self):
1258  '''Returns the end (or closing) statement of this directive, i.e.
1259  "omp end master". The visitor is responsible for adding the
1260  correct directive beginning (e.g. "!$").
1261 
1262  :returns: the end statement for this directive.
1263  :rtype: str
1264 
1265  '''
1266  return "omp end master"
1267 
1268 
1270  ''' Class representing an OpenMP Parallel directive.
1271  '''
1272 
1273  _children_valid_format = ("Schedule, OMPDefaultClause, OMPPrivateClause, "
1274  "OMPFirstprivate, [OMPReductionClause]*")
1275 
1276  @staticmethod
1277  def create(children=None):
1278  '''
1279  Create an OMPParallelDirective.
1280 
1281  :param children: The child nodes of the new directive.
1282  :type children: List of :py:class:`psyclone.psyir.nodes.Node`
1283 
1284  :returns: A new OMPParallelDirective.
1285  :rtype: :py:class:`psyclone.psyir.nodes.OMPParallelDirective`
1286  '''
1287 
1288  instance = OMPParallelDirective(children=children)
1289 
1290  # An OMPParallelDirective must have 4 children.
1291  # Child 0 is a Schedule, created in the constructor.
1292  # The create function adds the other three mandatory children:
1293  # OMPDefaultClause, OMPPrivateClause and OMPFirstprivateClause
1294  instance.addchild(OMPDefaultClause(clause_type=OMPDefaultClause.
1295  DefaultClauseTypes.SHARED))
1296  instance.addchild(OMPPrivateClause())
1297  instance.addchild(OMPFirstprivateClause())
1298 
1299  return instance
1300 
1301  @staticmethod
1302  def _validate_child(position, child):
1303  '''
1304  :param int position: the position to be validated.
1305  :param child: a child to be validated.
1306  :type child: :py:class:`psyclone.psyir.nodes.Node`
1307 
1308  :return: whether the given child and position are valid for this node.
1309  :rtype: bool
1310 
1311  '''
1312  if position == 0 and isinstance(child, Schedule):
1313  return True
1314  if position == 1 and isinstance(child, OMPDefaultClause):
1315  return True
1316  if position == 2 and isinstance(child, OMPPrivateClause):
1317  return True
1318  if position == 3 and isinstance(child, OMPFirstprivateClause):
1319  return True
1320  if position >= 4 and isinstance(child, OMPReductionClause):
1321  return True
1322  return False
1323 
1324  @property
1325  def default_clause(self):
1326  '''
1327  :returns: The OMPDefaultClause associated with this Directive.
1328  :rtype: :py:class:`psyclone.psyir.nodes.OMPDefaultClause`
1329  '''
1330  return self.childrenchildrenchildren[1]
1331 
1332  @property
1333  def private_clause(self):
1334  '''
1335  :returns: The current OMPPrivateClause associated with this Directive.
1336  :rtype: :py:class:`psyclone.psyir.nodes.OMPPrivateClause`
1337  '''
1338  return self.childrenchildrenchildren[2]
1339 
1340  def gen_code(self, parent):
1341  '''Generate the fortran OMP Parallel Directive and any associated
1342  code.
1343 
1344  :param parent: the node in the generated AST to which to add content.
1345  :type parent: :py:class:`psyclone.f2pygen.BaseGen`
1346 
1347  :raises GenerationError: if the OpenMP directive needs some
1348  synchronisation mechanism to create valid code. These are not
1349  implemented yet.
1350 
1351  '''
1352  # pylint: disable=import-outside-toplevel
1353  from psyclone.psyGen import zero_reduction_variables
1354 
1355  # We're not doing nested parallelism so make sure that this
1356  # omp parallel region is not already within some parallel region
1357  self.validate_global_constraintsvalidate_global_constraintsvalidate_global_constraints()
1358 
1359  # Check that this OpenMP PARALLEL directive encloses other
1360  # OpenMP directives. Although it is valid OpenMP if it doesn't,
1361  # this almost certainly indicates a user error.
1362  self._encloses_omp_directive_encloses_omp_directive()
1363 
1364  # Generate the private and firstprivate clauses
1365  private, fprivate, need_sync = self.infer_sharing_attributesinfer_sharing_attributes()
1366  private_clause = OMPPrivateClause.create(
1367  sorted(private, key=lambda x: x.name))
1368  fprivate_clause = OMPFirstprivateClause.create(
1369  sorted(fprivate, key=lambda x: x.name))
1370  if need_sync:
1371  raise GenerationError(
1372  f"OMPParallelDirective.gen_code() does not support symbols "
1373  f"that need synchronisation, but found: "
1374  f"{[x.name for x in need_sync]}")
1375 
1376  reprod_red_call_list = self.reductionsreductions(reprod=True)
1377  if reprod_red_call_list:
1378  # we will use a private thread index variable
1379  thread_idx = self.scopescope.symbol_table.\
1380  lookup_with_tag("omp_thread_index")
1381  private_clause.addchild(Reference(thread_idx))
1382  thread_idx = thread_idx.name
1383  # declare the variable
1384  parent.add(DeclGen(parent, datatype="integer",
1385  entity_decls=[thread_idx]))
1386 
1387  calls = self.reductionsreductions()
1388 
1389  # first check whether we have more than one reduction with the same
1390  # name in this Schedule. If so, raise an error as this is not
1391  # supported for a parallel region.
1392  names = []
1393  for call in calls:
1394  name = call.reduction_arg.name
1395  if name in names:
1396  raise GenerationError(
1397  f"Reduction variables can only be used once in an invoke. "
1398  f"'{name}' is used multiple times, please use a different "
1399  f"reduction variable")
1400  names.append(name)
1401 
1402  zero_reduction_variables(calls, parent)
1403 
1404  # pylint: disable=protected-access
1405  clauses_str = self.default_clausedefault_clause._clause_string
1406  # pylint: enable=protected-access
1407 
1408  private_list = [child.symbol.name for child in private_clause.children]
1409  if private_list:
1410  clauses_str += ", private(" + ",".join(private_list) + ")"
1411  fp_list = [child.symbol.name for child in fprivate_clause.children]
1412  if fp_list:
1413  clauses_str += ", firstprivate(" + ",".join(fp_list) + ")"
1414  parent.add(DirectiveGen(parent, "omp", "begin", "parallel",
1415  f"{clauses_str}"))
1416 
1417  if reprod_red_call_list:
1418  # add in a local thread index
1419  parent.add(UseGen(parent, name="omp_lib", only=True,
1420  funcnames=["omp_get_thread_num"]))
1421  parent.add(AssignGen(parent, lhs=thread_idx,
1422  rhs="omp_get_thread_num()+1"))
1423 
1424  first_type = type(self.dir_bodydir_body[0])
1425  for child in self.dir_bodydir_body.children:
1426  if first_type != type(child):
1427  raise NotImplementedError("Cannot correctly generate code"
1428  " for an OpenMP parallel region"
1429  " containing children of "
1430  "different types")
1431  child.gen_code(parent)
1432 
1433  parent.add(DirectiveGen(parent, "omp", "end", "parallel", ""))
1434 
1435  if reprod_red_call_list:
1436  parent.add(CommentGen(parent, ""))
1437  parent.add(CommentGen(parent, " sum the partial results "
1438  "sequentially"))
1439  parent.add(CommentGen(parent, ""))
1440  for call in reprod_red_call_list:
1441  call.reduction_sum_loop(parent)
1442 
1443  self.gen_post_region_codegen_post_region_code(parent)
1444 
1446  '''
1447  In-place construction of clauses as PSyIR constructs.
1448  At the higher level these clauses rely on dynamic variable dependence
1449  logic to decide what is private and what is shared, so we use this
1450  lowering step to find out which References are private, and place them
1451  explicitly in the lower-level tree to be processed by the backend
1452  visitor.
1453 
1454  :returns: the lowered version of this node.
1455  :rtype: :py:class:`psyclone.psyir.node.Node`
1456 
1457  :raises GenerationError: if the OpenMP directive needs some
1458  synchronisation mechanism to create valid code. These are not
1459  implemented yet.
1460  '''
1461  # Keep the first two children and compute the rest using the current
1462  # state of the node/tree (lowering it first in case new symbols are
1463  # created)
1464  self._children_children_children = self._children_children_children[:2]
1465  for child in self.childrenchildrenchildren:
1466  child.lower_to_language_level()
1467 
1468  # Create data sharing clauses (order alphabetically to make generation
1469  # reproducible)
1470  private, fprivate, need_sync = self.infer_sharing_attributesinfer_sharing_attributes()
1471  private_clause = OMPPrivateClause.create(
1472  sorted(private, key=lambda x: x.name))
1473  fprivate_clause = OMPFirstprivateClause.create(
1474  sorted(fprivate, key=lambda x: x.name))
1475  # Check all of the need_sync nodes are synchronized in children.
1476  sync_clauses = self.walkwalk(OMPDependClause)
1477  if need_sync:
1478  for sym in need_sync:
1479  found = False
1480  for clause in sync_clauses:
1481  # Needs to be an out depend clause to synchronize
1482  if clause.operand == "in":
1483  continue
1484  # Check if the symbol is in this depend clause.
1485  if sym.name in [child.symbol.name for child in
1486  clause.children]:
1487  found = True
1488  if found:
1489  break
1490  if not found:
1491  raise GenerationError(
1492  f"Lowering '{type(self).__name__}' does not support "
1493  f"symbols that need synchronisation unless they are "
1494  f"in a depend clause, but found: "
1495  f"'{sym.name}' which is not in a depend clause.")
1496 
1497  self.addchildaddchild(private_clause)
1498  self.addchildaddchild(fprivate_clause)
1499  return self
1500 
1501  def begin_string(self):
1502  '''Returns the beginning statement of this directive, i.e.
1503  "omp parallel". The visitor is responsible for adding the
1504  correct directive beginning (e.g. "!$").
1505 
1506  :returns: the opening statement of this directive.
1507  :rtype: str
1508 
1509  '''
1510  result = "omp parallel"
1511  # TODO #514: not yet working with NEMO, so commented out for now
1512  # if not self._reprod:
1513  # result += self._reduction_string()
1514 
1515  return result
1516 
1517  def end_string(self):
1518  '''Returns the end (or closing) statement of this directive, i.e.
1519  "omp end parallel". The visitor is responsible for adding the
1520  correct directive beginning (e.g. "!$").
1521 
1522  :returns: the end statement for this directive.
1523  :rtype: str
1524 
1525  '''
1526  return "omp end parallel"
1527 
1529  '''
1530  The PSyIR does not specify if each symbol inside an OpenMP region is
1531  private, firstprivate, shared or shared but needs synchronisation,
1532  the attributes are inferred looking at the usage of each symbol inside
1533  the parallel region.
1534 
1535  This method analyses the directive body and automatically classifies
1536  each symbol using the following rules:
1537  - All arrays are shared.
1538  - Scalars that are accessed only once are shared.
1539  - Scalars that are read-only or written outside a loop are shared.
1540  - Scalars written in multiple iterations of a loop are private, unless:
1541 
1542  * there is a write-after-read dependency in a loop iteration,
1543  in this case they are shared but need synchronisation;
1544  * they are read before in the same parallel region (but not inside
1545  the same loop iteration), in this case they are firstprivate.
1546  * they are only conditionally written in some iterations;
1547  in this case they are firstprivate.
1548 
1549  This method returns the sets of private, firstprivate, and shared but
1550  needing synchronisation symbols, all symbols not in these sets are
1551  assumed shared. How to synchronise the symbols in the third set is
1552  up to the caller of this method.
1553 
1554  :returns: three set of symbols that classify each of the symbols in
1555  the directive body as PRIVATE, FIRSTPRIVATE or SHARED NEEDING
1556  SYNCHRONISATION.
1557  :rtype: Tuple[Set(:py:class:`psyclone.psyir.symbols.Symbol`),
1558  Set(:py:class:`psyclone.psyir.symbols.Symbol`),
1559  Set(:py:class:`psyclone.psyir.symbols.Symbol`)]
1560 
1561  :raises GenerationError: if the DefaultClauseType associated with
1562  this OMPParallelDirective is not shared.
1563 
1564  '''
1565  if (self.default_clausedefault_clause.clause_type !=
1566  OMPDefaultClause.DefaultClauseTypes.SHARED):
1567  raise GenerationError("OMPParallelClause cannot correctly generate"
1568  " the private clause when its default "
1569  "data sharing attribute in its default "
1570  "clause is not 'shared'.")
1571 
1572  # TODO #598: Improve the handling of scalar variables, there are
1573  # remaining issues when we have accesses after the parallel region
1574  # of variables that we currently declare as private. This should be
1575  # lastprivate.
1576  # e.g:
1577  # !$omp parallel do <- will set private(ji, my_index)
1578  # do ji = 1, jpk
1579  # my_index = ji+1
1580  # array(my_index) = 2
1581  # enddo
1582  # #end do
1583  # call func(my_index) <- my_index has not been updated
1584 
1585  private = set()
1586  fprivate = set()
1587  need_sync = set()
1588 
1589  # Determine variables that must be private, firstprivate or need_sync
1590  var_accesses = VariablesAccessInfo()
1591  self.reference_accessesreference_accesses(var_accesses)
1592  for signature in var_accesses.all_signatures:
1593  accesses = var_accesses[signature].all_accesses
1594  # Ignore variables that have indices, we only look at scalars
1595  if accesses[0].is_array():
1596  continue
1597 
1598  # If a variable is only accessed once, it is either an error
1599  # or a shared variable - anyway it is not private
1600  if len(accesses) == 1:
1601  continue
1602 
1603  # TODO #598: If we only have writes, it must be need_sync:
1604  # do ji = 1, jpk
1605  # if ji=3:
1606  # found = .true.
1607  # Or lastprivate in order to maintain the serial semantics
1608  # do ji = 1, jpk
1609  # found = ji
1610 
1611  # We consider private variables as being the ones that are written
1612  # in every iteration of a loop.
1613  # If one such scalar is potentially read before it is written, it
1614  # will be considered firstprivate.
1615  has_been_read = False
1616  last_read_position = 0
1617  for access in accesses:
1618  if access.access_type == AccessType.READ:
1619  has_been_read = True
1620  last_read_position = access.node.abs_position
1621 
1622  if access.access_type == AccessType.WRITE:
1623 
1624  # Check if the write access is outside a loop. In this case
1625  # it will be marked as shared. This is done because it is
1626  # likely to be re-used later. e.g:
1627  # !$omp parallel
1628  # jpk = 100
1629  # !omp do
1630  # do ji = 1, jpk
1631  loop_ancestor = access.node.ancestor(
1632  (Loop, WhileLoop),
1633  limit=self,
1634  include_self=True)
1635  if not loop_ancestor:
1636  # If we find it at least once outside a loop we keep it
1637  # as shared
1638  break
1639 
1640  # Otherwise, the assignment to this variable is inside a
1641  # loop (and it will be repeated for each iteration), so
1642  # we declare it as private or need_synch
1643  name = signature.var_name
1644  # TODO #2094: var_name only captures the top-level
1645  # component in the derived type accessor. If the attributes
1646  # only apply to a sub-component, this won't be captured
1647  # appropriately.
1648  symbol = access.node.scope.symbol_table.lookup(name)
1649 
1650  # If it has been read before we have to check if ...
1651  if has_been_read:
1652  loop_pos = loop_ancestor.loop_body.abs_position
1653  if last_read_position < loop_pos:
1654  # .. it was before the loop, so it is fprivate
1655  fprivate.add(symbol)
1656  else:
1657  # or inside the loop, in which case it needs sync
1658  need_sync.add(symbol)
1659  break
1660 
1661  # If the write is not guaranteed, we make it firstprivate
1662  # so that in the case that the write doesn't happen we keep
1663  # the original value
1664  conditional_write = access.node.ancestor(
1665  IfBlock,
1666  limit=loop_ancestor,
1667  include_self=True)
1668  if conditional_write:
1669  fprivate.add(symbol)
1670  break
1671 
1672  # Already found the first write and decided if it is
1673  # shared, private or firstprivate. We can stop looking.
1674  private.add(symbol)
1675  break
1676 
1677  return private, fprivate, need_sync
1678 
1680  '''
1681  Perform validation checks that can only be done at code-generation
1682  time.
1683 
1684  :raises GenerationError: if this OMPDoDirective is not enclosed \
1685  within some OpenMP parallel region.
1686  '''
1687  if self.ancestorancestor(OMPParallelDirective) is not None:
1688  raise GenerationError("Cannot nest OpenMP parallel regions.")
1689  self._encloses_omp_directive_encloses_omp_directive()
1690 
1691  def _encloses_omp_directive(self):
1692  ''' Check that this Parallel region contains other OpenMP
1693  directives. While it doesn't have to (in order to be valid
1694  OpenMP), it is likely that an absence of directives
1695  is an error on the part of the user. '''
1696  # We need to recurse down through all our children and check
1697  # whether any of them are an OMPRegionDirective.
1698  node_list = self.walkwalk(OMPRegionDirective)
1699  if not node_list:
1700  # TODO raise a warning here so that the user can decide
1701  # whether or not this is OK.
1702  pass
1703  # raise GenerationError("OpenMP parallel region does not enclose "
1704  # "any OpenMP directives. This is probably "
1705  # "not what you want.")
1706 
1707 
1709  '''
1710  Class representing an OpenMP TASKLOOP directive in the PSyIR.
1711 
1712  :param grainsize: The grainsize value used to specify the grainsize \
1713  clause on this OpenMP directive. If this is None \
1714  the grainsize clause is not applied. Default \
1715  value is None.
1716  :type grainsize: int or None.
1717  :param num_tasks: The num_tasks value used to specify the num_tasks \
1718  clause on this OpenMP directive. If this is None \
1719  the num_tasks clause is not applied. Default value \
1720  is None.
1721  :type num_tasks: int or None.
1722  :param nogroup: Whether the nogroup clause should be used for this node. \
1723  Default value is False
1724  :type nogroup: bool
1725  :param kwargs: additional keyword arguments provided to the PSyIR node.
1726  :type kwargs: unwrapped dict.
1727 
1728  :raises GenerationError: if this OMPTaskloopDirective has both \
1729  a grainsize and num_tasks value \
1730  specified.
1731  '''
1732  # This specification respects the mutual exclusion of OMPGransizeClause
1733  # and OMPNumTasksClause, but adds an additional ordering requirement.
1734  # Other specifications to soften the ordering requirement are possible,
1735  # but need additional checks in the global constraints instead.
1736  _children_valid_format = ("Schedule, [OMPGrainsizeClause | "
1737  "OMPNumTasksClause], [OMPNogroupClause]")
1738 
1739  def __init__(self, grainsize=None, num_tasks=None, nogroup=False,
1740  **kwargs):
1741  # These remain primarily for the gen_code interface
1742  self._grainsize_grainsize = grainsize
1743  self._num_tasks_num_tasks = num_tasks
1744  self._nogroup_nogroup = nogroup
1745  if self._grainsize_grainsize is not None and self._num_tasks_num_tasks is not None:
1746  raise GenerationError(
1747  "OMPTaskloopDirective must not have both grainsize and "
1748  "numtasks clauses specified.")
1749  super().__init__(**kwargs)
1750  if self._grainsize_grainsize is not None:
1751  child = [Literal(f"{grainsize}", INTEGER_TYPE)]
1752  self._children_children.append(OMPGrainsizeClause(children=child))
1753  if self._num_tasks_num_tasks is not None:
1754  child = [Literal(f"{num_tasks}", INTEGER_TYPE)]
1755  self._children_children.append(OMPNumTasksClause(children=child))
1756  if self._nogroup_nogroup:
1757  self._children_children.append(OMPNogroupClause())
1758 
1759  @staticmethod
1760  def _validate_child(position, child):
1761  '''
1762  Decides whether a given child and position are valid for this node.
1763  The rules are:
1764  1. Child 0 must always be a Schedule.
1765  2. Child 1 may be either a OMPGrainsizeClause or OMPNumTasksClause, \
1766  or if neither of those clauses are present, it may be a \
1767  OMPNogroupClause.
1768  3. Child 2 must always be a OMPNogroupClause, and can only exist if \
1769  child 1 is a OMPGrainsizeClause or OMPNumTasksClause.
1770 
1771  :param int position: the position to be validated.
1772  :param child: a child to be validated.
1773  :type child: :py:class:`psyclone.psyir.nodes.Node`
1774 
1775  :return: whether the given child and position are valid for this node.
1776  :rtype: bool
1777 
1778  '''
1779  if position == 0:
1780  return isinstance(child, Schedule)
1781  if position == 1:
1782  return isinstance(child, (OMPGrainsizeClause, OMPNumTasksClause,
1783  OMPNogroupClause))
1784  if position == 2:
1785  return isinstance(child, OMPNogroupClause)
1786  return False
1787 
1788  @property
1789  def nogroup(self):
1790  '''
1791  :returns: the nogroup clause status of this node.
1792  :rtype: bool
1793  '''
1794  return self._nogroup_nogroup
1795 
1797  '''
1798  Perform validation checks that can only be done at code-generation
1799  time.
1800 
1801  :raises GenerationError: if this OMPTaskloopDirective is not \
1802  enclosed within an OpenMP serial region.
1803  :raises GenerationError: if this OMPTaskloopDirective has two
1804  Nogroup clauses as children.
1805  '''
1806  # It is only at the point of code generation that we can check for
1807  # correctness (given that we don't mandate the order that a user
1808  # can apply transformations to the code). A taskloop directive, we must
1809  # have an OMPSerialDirective as an ancestor back up the tree.
1810  if not self.ancestorancestor(OMPSerialDirective):
1811  raise GenerationError(
1812  "OMPTaskloopDirective must be inside an OMP Serial region "
1813  "but could not find an ancestor node")
1814 
1815  # Check children are well formed.
1816  # _validate_child will ensure position 0 and 1 are valid.
1817  if len(self._children_children) == 3 and isinstance(self._children_children[1],
1818  OMPNogroupClause):
1819  raise GenerationError(
1820  "OMPTaskloopDirective has two Nogroup clauses as children "
1821  "which is not allowed.")
1822 
1823  super().validate_global_constraints()
1824 
1825  def gen_code(self, parent):
1826  '''
1827  Generate the f2pygen AST entries in the Schedule for this OpenMP
1828  taskloop directive.
1829 
1830  :param parent: the parent Node in the Schedule to which to add our \
1831  content.
1832  :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
1833  :raises GenerationError: if this "!$omp taskloop" is not enclosed \
1834  within an OMP Parallel region and an OMP \
1835  Serial region.
1836 
1837  '''
1838  self.validate_global_constraintsvalidate_global_constraintsvalidate_global_constraints()
1839 
1840  extra_clauses = ""
1841  # Find the specified clauses
1842  clause_list = []
1843  if self._grainsize_grainsize is not None:
1844  clause_list.append(f"grainsize({self._grainsize})")
1845  if self._num_tasks_num_tasks is not None:
1846  clause_list.append(f"num_tasks({self._num_tasks})")
1847  if self._nogroup_nogroup:
1848  clause_list.append("nogroup")
1849  # Generate the string containing the required clauses
1850  extra_clauses = ", ".join(clause_list)
1851 
1852  parent.add(DirectiveGen(parent, "omp", "begin", "taskloop",
1853  extra_clauses))
1854 
1855  self.dir_bodydir_body.gen_code(parent)
1856 
1857  # make sure the directive occurs straight after the loop body
1858  position = parent.previous_loop()
1859  parent.add(DirectiveGen(parent, "omp", "end", "taskloop", ""),
1860  position=["after", position])
1861 
1862  def begin_string(self):
1863  '''Returns the beginning statement of this directive, i.e.
1864  "omp taskloop ...". The visitor is responsible for adding the
1865  correct directive beginning (e.g. "!$").
1866 
1867  :returns: the beginning statement for this directive.
1868  :rtype: str
1869 
1870  '''
1871  return "omp taskloop"
1872 
1873  def end_string(self):
1874  '''Returns the end (or closing) statement of this directive, i.e.
1875  "omp end taskloop". The visitor is responsible for adding the
1876  correct directive beginning (e.g. "!$").
1877 
1878  :returns: the end statement for this directive.
1879  :rtype: str
1880 
1881  '''
1882  return "omp end taskloop"
1883 
1884 
1886  '''
1887  Class representing an OpenMP DO directive in the PSyIR.
1888 
1889  :param str omp_schedule: the OpenMP schedule to use (defaults to
1890  "none" which means it is implementation dependent).
1891  :param Optional[int] collapse: optional number of nested loops to \
1892  collapse into a single iteration space to parallelise. Defaults to \
1893  None.
1894  :param Optional[bool] reprod: whether or not to generate code for \
1895  run-reproducible OpenMP reductions (if not specified the value is \
1896  provided by the PSyclone Config file).
1897  :param kwargs: additional keyword arguments provided to the PSyIR node.
1898  :type kwargs: unwrapped dict.
1899 
1900  '''
1901  _directive_string = "do"
1902 
1903  def __init__(self, omp_schedule="none", collapse=None, reprod=None,
1904  **kwargs):
1905 
1906  super().__init__(**kwargs)
1907  if reprod is None:
1908  self._reprod_reprod = Config.get().reproducible_reductions
1909  else:
1910  self._reprod_reprod = reprod
1911 
1912  self._omp_schedule_omp_schedule = omp_schedule
1913  self._collapse_collapse = None
1914  self.collapsecollapsecollapsecollapse = collapse # Use setter with error checking
1915 
1916  def __eq__(self, other):
1917  '''
1918  Checks whether two nodes are equal. Two OMPDoDirective nodes are equal
1919  if they have the same schedule, the same reproducible reduction option
1920  (and the inherited equality is True).
1921 
1922  :param object other: the object to check equality to.
1923 
1924  :returns: whether other is equal to self.
1925  :rtype: bool
1926  '''
1927  is_eq = super().__eq__(other)
1928  is_eq = is_eq and self.omp_scheduleomp_scheduleomp_scheduleomp_schedule == other.omp_schedule
1929  is_eq = is_eq and self.reprodreprodreprodreprod == other.reprod
1930  is_eq = is_eq and self.collapsecollapsecollapsecollapse == other.collapse
1931 
1932  return is_eq
1933 
1934  @property
1935  def collapse(self):
1936  '''
1937  :returns: the value of the collapse clause.
1938  :rtype: int or NoneType
1939  '''
1940  return self._collapse_collapse
1941 
1942  @collapse.setter
1943  def collapse(self, value):
1944  '''
1945  TODO #1648: Note that gen_code ignores the collapse clause but the
1946  generated code is still valid. Since gen_code is going to be removed
1947  and it is only used for LFRic (which does not support GPU offloading
1948  that gets improved with the collapse clause) it will not be supported.
1949 
1950  :param value: optional number of nested loop to collapse into a \
1951  single iteration space to parallelise. Defaults to None.
1952  :type value: int or NoneType.
1953 
1954  :raises TypeError: if the collapse value given is not an integer \
1955  or NoneType.
1956  :raises ValueError: if the collapse integer given is not positive.
1957 
1958  '''
1959  if value is not None and not isinstance(value, int):
1960  raise TypeError(
1961  f"The {type(self).__name__} collapse clause must be a positive"
1962  f" integer or None, but value '{value}' has been given.")
1963 
1964  if value is not None and value <= 0:
1965  raise ValueError(
1966  f"The {type(self).__name__} collapse clause must be a positive"
1967  f" integer or None, but value '{value}' has been given.")
1968 
1969  self._collapse_collapse = value
1970 
1971  def node_str(self, colour=True):
1972  '''
1973  Returns the name of this node with (optional) control codes
1974  to generate coloured output in a terminal that supports it.
1975 
1976  :param bool colour: whether or not to include colour control codes.
1977 
1978  :returns: description of this node, possibly coloured.
1979  :rtype: str
1980  '''
1981  parts = []
1982  if self.omp_scheduleomp_scheduleomp_scheduleomp_schedule != "none":
1983  parts.append(f"omp_schedule={self.omp_schedule}")
1984  if self.reductionsreductions():
1985  parts.append(f"reprod={self._reprod}")
1986  if self._collapse_collapse and self._collapse_collapse > 1:
1987  parts.append(f"collapse={self._collapse}")
1988  return f"{self.coloured_name(colour)}[{','.join(parts)}]"
1989 
1990  def _reduction_string(self):
1991  '''
1992  :returns: the OMP reduction information.
1993  :rtype: str
1994  '''
1995  for reduction_type in AccessType.get_valid_reduction_modes():
1996  reductions = self._get_reductions_list_get_reductions_list(reduction_type)
1997  parts = []
1998  for reduction in reductions:
1999  parts.append(f"reduction("
2000  f"{OMP_OPERATOR_MAPPING[reduction_type]}:"
2001  f"{reduction})")
2002  return ", ".join(parts)
2003 
2004  @property
2005  def omp_schedule(self):
2006  '''
2007  :returns: the omp_schedule for this object.
2008  :rtype: str
2009  '''
2010  return self._omp_schedule_omp_schedule
2011 
2012  @omp_schedule.setter
2013  def omp_schedule(self, value):
2014  '''
2015  :param str value: the omp_schedule for this object.
2016 
2017  :raises TypeError: if the provided omp_schedule is not a valid \
2018  schedule string.
2019  '''
2020  if not isinstance(value, str):
2021  raise TypeError(
2022  f"{type(self).__name__} omp_schedule should be a str "
2023  f"but found '{type(value).__name__}'.")
2024  if (value.split(',')[0].lower() not in
2025  OMPScheduleClause.VALID_OMP_SCHEDULES):
2026  raise TypeError(
2027  f"{type(self).__name__} omp_schedule should be one of "
2028  f"{OMPScheduleClause.VALID_OMP_SCHEDULES} but found "
2029  f"'{value}'.")
2030  self._omp_schedule_omp_schedule = value
2031 
2032  @property
2033  def reprod(self):
2034  '''
2035  :returns: whether reprod has been set for this object or not.
2036  '''
2037  return self._reprod_reprod
2038 
2039  @reprod.setter
2040  def reprod(self, value):
2041  '''
2042  :param bool value: enable or disable reproducible loop parallelism.
2043  '''
2044  self._reprod_reprod = value
2045 
2047  '''
2048  Perform validation checks that can only be done at code-generation
2049  time.
2050 
2051  :raises GenerationError: if this OMPDoDirective is not enclosed \
2052  within some OpenMP parallel region.
2053  '''
2054  # It is only at the point of code generation that we can check for
2055  # correctness (given that we don't mandate the order that a user
2056  # can apply transformations to the code). As a loop
2057  # directive, we must have an OMPParallelDirective as an ancestor
2058  # somewhere back up the tree.
2059  if not self.ancestorancestor(OMPParallelDirective,
2060  excluding=OMPParallelDoDirective):
2061  raise GenerationError(
2062  "OMPDoDirective must be inside an OMP parallel region but "
2063  "could not find an ancestor OMPParallelDirective node")
2064 
2065  self._validate_single_loop_validate_single_loop()
2066  self._validate_collapse_value_validate_collapse_value()
2067 
2068  super().validate_global_constraints()
2069 
2070  def _validate_collapse_value(self):
2071  '''
2072  Checks that if there is a collapse clause, there must be as many
2073  immediately nested loops as the collapse value.
2074 
2075  :raises GenerationError: if this OMPLoopDirective has a collapse \
2076  clause but it doesn't have the expected number of nested Loops.
2077  '''
2078  if self._collapse_collapse:
2079  cursor = self.dir_bodydir_body.children[0]
2080  for depth in range(self._collapse_collapse):
2081  if (len(cursor.parent.children) != 1 or
2082  not isinstance(cursor, Loop)):
2083  raise GenerationError(
2084  f"{type(self).__name__} must have as many immediately "
2085  f"nested loops as the collapse clause specifies but "
2086  f"'{self}' has a collapse={self._collapse} and the "
2087  f"nested body at depth {depth} cannot be "
2088  f"collapsed.")
2089  cursor = cursor.loop_body.children[0]
2090 
2091  def _validate_single_loop(self):
2092  '''
2093  Checks that this directive is only applied to a single Loop node.
2094 
2095  :raises GenerationError: if this directive has more than one child.
2096  :raises GenerationError: if the child of this directive is not a Loop.
2097 
2098  '''
2099  if len(self.dir_bodydir_body.children) != 1:
2100  raise GenerationError(
2101  f"An {type(self).__name__} can only be applied to a single "
2102  f"loop but this Node has {len(self.dir_body.children)} "
2103  f"children: {self.dir_body.children}")
2104 
2105  if not isinstance(self.dir_bodydir_body[0], Loop):
2106  raise GenerationError(
2107  f"An {type(self).__name__} can only be applied to a loop but "
2108  f"this Node has a child of type "
2109  f"'{type(self.dir_body[0]).__name__}'")
2110 
2111  def gen_code(self, parent):
2112  '''
2113  Generate the f2pygen AST entries in the Schedule for this OpenMP do
2114  directive.
2115 
2116  TODO #1648: Note that gen_code ignores the collapse clause but the
2117  generated code is still valid. Since gen_code is going to be removed
2118  and it is only used for LFRic (which does not support GPU offloading
2119  that gets improved with the collapse clause) it will not be supported.
2120 
2121  :param parent: the parent Node in the Schedule to which to add our \
2122  content.
2123  :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
2124  :raises GenerationError: if this "!$omp do" is not enclosed within \
2125  an OMP Parallel region.
2126 
2127  '''
2128  self.validate_global_constraintsvalidate_global_constraintsvalidate_global_constraints()
2129 
2130  parts = []
2131 
2132  if self.omp_scheduleomp_scheduleomp_scheduleomp_schedule != "none":
2133  parts.append(f"schedule({self.omp_schedule})")
2134 
2135  if not self._reprod_reprod:
2136  red_str = self._reduction_string_reduction_string()
2137  if red_str:
2138  parts.append(red_str)
2139 
2140  # As we're a loop we don't specify the scope
2141  # of any variables so we don't have to generate the
2142  # list of private variables
2143  options = ", ".join(parts)
2144  parent.add(DirectiveGen(parent, "omp", "begin", "do", options))
2145 
2146  for child in self.childrenchildrenchildren:
2147  child.gen_code(parent)
2148 
2149  # make sure the directive occurs straight after the loop body
2150  position = parent.previous_loop()
2151  parent.add(DirectiveGen(parent, "omp", "end", "do", ""),
2152  position=["after", position])
2153 
2154  def begin_string(self):
2155  '''Returns the beginning statement of this directive, i.e.
2156  "omp do ...". The visitor is responsible for adding the
2157  correct directive beginning (e.g. "!$").
2158 
2159  :returns: the beginning statement for this directive.
2160  :rtype: str
2161 
2162  '''
2163  string = f"omp {self._directive_string}"
2164  if self.omp_scheduleomp_scheduleomp_scheduleomp_schedule != "none":
2165  string += f" schedule({self.omp_schedule})"
2166  if self._collapse_collapse:
2167  string += f" collapse({self._collapse})"
2168  return string
2169 
2170  def end_string(self):
2171  '''Returns the end (or closing) statement of this directive, i.e.
2172  "omp end do". The visitor is responsible for adding the
2173  correct directive beginning (e.g. "!$").
2174 
2175  :returns: the end statement for this directive.
2176  :rtype: str
2177 
2178  '''
2179  return f"omp end {self._directive_string}"
2180 
2181 
2183  ''' Class for the !$OMP PARALLEL DO directive. This inherits from
2184  both OMPParallelDirective (because it creates a new OpenMP
2185  thread-parallel region) and OMPDoDirective (because it
2186  causes a loop to be parallelised).
2187 
2188  :param kwargs: additional keyword arguments provided to the PSyIR node.
2189  :type kwargs: unwrapped dict.
2190  '''
2191 
2192  _children_valid_format = ("Schedule, OMPDefaultClause, OMPPrivateClause, "
2193  "OMPFirstprivateClause, OMPScheduleClause, "
2194  "[OMPReductionClause]*")
2195  _directive_string = "parallel do"
2196 
2197  def __init__(self, **kwargs):
2198  OMPDoDirective.__init__(self, **kwargs)
2199  self.addchildaddchild(OMPDefaultClause(
2200  clause_type=OMPDefaultClause.DefaultClauseTypes.SHARED))
2201 
2202  @staticmethod
2203  def _validate_child(position, child):
2204  '''
2205  :param int position: the position to be validated.
2206  :param child: a child to be validated.
2207  :type child: :py:class:`psyclone.psyir.nodes.Node`
2208 
2209  :return: whether the given child and position are valid for this node.
2210  :rtype: bool
2211 
2212  '''
2213  if position == 0 and isinstance(child, Schedule):
2214  return True
2215  if position == 1 and isinstance(child, OMPDefaultClause):
2216  return True
2217  if position == 2 and isinstance(child, OMPPrivateClause):
2218  return True
2219  if position == 3 and isinstance(child, OMPFirstprivateClause):
2220  return True
2221  if position == 4 and isinstance(child, OMPScheduleClause):
2222  return True
2223  if position >= 5 and isinstance(child, OMPReductionClause):
2224  return True
2225  return False
2226 
2227  def gen_code(self, parent):
2228  '''
2229  Generate the f2pygen AST entries in the Schedule for this OpenMP
2230  directive.
2231 
2232  TODO #1648: Note that gen_code ignores the collapse clause but the
2233  generated code is still valid. Since gen_code is going to be removed
2234  and it is only used for LFRic (which does not support GPU offloading
2235  that gets improved with the collapse clause) it will not be supported.
2236 
2237  :param parent: the parent Node in the Schedule to which to add our \
2238  content.
2239  :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen`
2240 
2241  '''
2242  # We're not doing nested parallelism so make sure that this
2243  # omp parallel do is not already within some parallel region
2244  # pylint: disable=import-outside-toplevel
2245  from psyclone.psyGen import zero_reduction_variables
2247 
2248  calls = self.reductionsreductions()
2249  zero_reduction_variables(calls, parent)
2250 
2251  # Set default() private() and firstprivate() clauses
2252  # pylint: disable=protected-access
2253  default_str = self.childrenchildrenchildren[1]._clause_string
2254  # pylint: enable=protected-access
2255  private, fprivate, need_sync = self.infer_sharing_attributesinfer_sharing_attributes()
2256  private_clause = OMPPrivateClause.create(
2257  sorted(private, key=lambda x: x.name))
2258  fprivate_clause = OMPFirstprivateClause.create(
2259  sorted(fprivate, key=lambda x: x.name))
2260  if need_sync:
2261  raise GenerationError(
2262  f"OMPParallelDoDirective.gen_code() does not support symbols "
2263  f"that need synchronisation, but found: "
2264  f"{[x.name for x in need_sync]}")
2265 
2266  private_str = ""
2267  fprivate_str = ""
2268  private_list = [child.symbol.name for child in private_clause.children]
2269  if private_list:
2270  private_str = "private(" + ",".join(private_list) + ")"
2271  fp_list = [child.symbol.name for child in fprivate_clause.children]
2272  if fp_list:
2273  fprivate_str = "firstprivate(" + ",".join(fp_list) + ")"
2274 
2275  # Set schedule clause
2276  if self._omp_schedule_omp_schedule != "none":
2277  schedule_str = f"schedule({self._omp_schedule})"
2278  else:
2279  schedule_str = ""
2280 
2281  # Add directive to the f2pygen tree
2282  parent.add(
2283  DirectiveGen(
2284  parent, "omp", "begin", "parallel do", ", ".join(
2285  text for text in [default_str, private_str, fprivate_str,
2286  schedule_str, self._reduction_string_reduction_string()]
2287  if text)))
2288 
2289  for child in self.dir_bodydir_body:
2290  child.gen_code(parent)
2291 
2292  # make sure the directive occurs straight after the loop body
2293  position = parent.previous_loop()
2294  parent.add(DirectiveGen(parent, *self.end_stringend_stringend_stringend_string().split()),
2295  position=["after", position])
2296 
2297  self.gen_post_region_codegen_post_region_code(parent)
2298 
2300  '''
2301  In-place construction of clauses as PSyIR constructs.
2302  The clauses here may need to be updated if code has changed, or be
2303  added if not yet present.
2304 
2305  :returns: the lowered version of this node.
2306  :rtype: :py:class:`psyclone.psyir.node.Node`
2307 
2308  '''
2309  # Calling the super() explicitly to avoid confusion
2310  # with the multiple-inheritance
2311  OMPParallelDirective.lower_to_language_level(self)
2312  self.addchildaddchild(OMPScheduleClause(self._omp_schedule_omp_schedule))
2313  return self
2314 
2315  def begin_string(self):
2316  '''Returns the beginning statement of this directive, i.e.
2317  "omp parallel do ...". The visitor is responsible for adding the
2318  correct directive beginning (e.g. "!$").
2319 
2320  :returns: the beginning statement for this directive.
2321  :rtype: str
2322 
2323  '''
2324  string = f"omp {self._directive_string}"
2325  if self._collapse_collapse:
2326  string += f" collapse({self._collapse})"
2327  string += self._reduction_string_reduction_string()
2328  return string
2329 
2330  def end_string(self):
2331  '''Returns the end (or closing) statement of this directive, i.e.
2332  "omp end parallel do". The visitor is responsible for adding the
2333  correct directive beginning (e.g. "!$").
2334 
2335  :returns: the end statement for this directive.
2336  :rtype: str
2337 
2338  '''
2339  return f"omp end {self._directive_string}"
2340 
2342  '''
2343  Perform validation checks that can only be done at code-generation
2344  time.
2345 
2346  '''
2347  OMPParallelDirective.validate_global_constraints(self)
2348 
2349  self._validate_single_loop_validate_single_loop()
2350  self._validate_collapse_value_validate_collapse_value()
2351 
2352 
2354  ''' Class representing the OMP teams distribute parallel do directive. '''
2355  _directive_string = "teams distribute parallel do"
2356 
2357 
2359  ''' Class for the !$OMP TARGET directive that offloads the code contained
2360  in its region into an accelerator device. '''
2361 
2362  def begin_string(self):
2363  '''Returns the beginning statement of this directive, i.e.
2364  "omp target". The visitor is responsible for adding the
2365  correct directive beginning (e.g. "!$").
2366 
2367  :returns: the opening statement of this directive.
2368  :rtype: str
2369 
2370  '''
2371  return "omp target"
2372 
2373  def end_string(self):
2374  '''Returns the end (or closing) statement of this directive, i.e.
2375  "omp end target". The visitor is responsible for adding the
2376  correct directive beginning (e.g. "!$").
2377 
2378  :returns: the end statement for this directive.
2379  :rtype: str
2380 
2381  '''
2382  return "omp end target"
2383 
2384 
2386  ''' Class for the !$OMP LOOP directive that specifies that the iterations
2387  of the associated loops may execute concurrently.
2388 
2389  :param Optional[int] collapse: optional number of nested loops to \
2390  collapse into a single iteration space to parallelise. Defaults \
2391  to None.
2392  :param kwargs: additional keyword arguments provided to the PSyIR node.
2393  :type kwargs: unwrapped dict.
2394  '''
2395 
2396  def __init__(self, collapse=None, **kwargs):
2397  super().__init__(**kwargs)
2398  self._collapse_collapse = None
2399  self.collapsecollapsecollapsecollapse = collapse # Use setter with error checking
2400 
2401  def __eq__(self, other):
2402  '''
2403  Checks whether two nodes are equal. Two OMPLoopDirective nodes are
2404  equal if they have the same collapse status and the inherited
2405  equality is true.
2406 
2407  :param object other: the object to check equality to.
2408 
2409  :returns: whether other is equal to self.
2410  :rtype: bool
2411  '''
2412  is_eq = super().__eq__(other)
2413  is_eq = is_eq and self.collapsecollapsecollapsecollapse == other.collapse
2414 
2415  return is_eq
2416 
2417  @property
2418  def collapse(self):
2419  '''
2420  :returns: the value of the collapse clause.
2421  :rtype: int or NoneType
2422  '''
2423  return self._collapse_collapse
2424 
2425  @collapse.setter
2426  def collapse(self, value):
2427  '''
2428  TODO #1648: Note that gen_code ignores the collapse clause but the
2429  generated code is still valid. Since gen_code is going to be removed
2430  and it is only used for LFRic (which does not support GPU offloading
2431  that gets improved with the collapse clause) it will not be supported.
2432 
2433  :param value: optional number of nested loop to collapse into a \
2434  single iteration space to parallelise. Defaults to None.
2435  :type value: int or NoneType.
2436 
2437  :raises TypeError: if the collapse value given is not an integer \
2438  or NoneType.
2439  :raises ValueError: if the collapse integer given is not positive.
2440 
2441  '''
2442  if value is not None and not isinstance(value, int):
2443  raise TypeError(
2444  f"The OMPLoopDirective collapse clause must be a positive "
2445  f"integer or None, but value '{value}' has been given.")
2446 
2447  if value is not None and value <= 0:
2448  raise ValueError(
2449  f"The OMPLoopDirective collapse clause must be a positive "
2450  f"integer or None, but value '{value}' has been given.")
2451 
2452  self._collapse_collapse = value
2453 
2454  def node_str(self, colour=True):
2455  ''' Returns the name of this node with (optional) control codes
2456  to generate coloured output in a terminal that supports it.
2457 
2458  :param bool colour: whether or not to include colour control codes.
2459 
2460  :returns: description of this node, possibly coloured.
2461  :rtype: str
2462  '''
2463  text = self.coloured_namecoloured_name(colour)
2464  if self._collapse_collapse:
2465  text += f"[collapse={self._collapse}]"
2466  else:
2467  text += "[]"
2468  return text
2469 
2470  def begin_string(self):
2471  ''' Returns the beginning statement of this directive, i.e. "omp loop".
2472  The visitor is responsible for adding the correct directive beginning
2473  (e.g. "!$").
2474 
2475  :returns: the opening statement of this directive.
2476  :rtype: str
2477 
2478  '''
2479  string = "omp loop"
2480  if self._collapse_collapse:
2481  string += f" collapse({self._collapse})"
2482  return string
2483 
2484  def end_string(self):
2485  '''Returns the end (or closing) statement of this directive, i.e.
2486  "omp end loop". The visitor is responsible for adding the
2487  correct directive beginning (e.g. "!$").
2488 
2489  :returns: the end statement for this directive.
2490  :rtype: str
2491 
2492  '''
2493  return "omp end loop"
2494 
2496  ''' Perform validation of those global constraints that can only be
2497  done at code-generation time.
2498 
2499  :raises GenerationError: if this OMPLoopDirective has more than one \
2500  child in its associated schedule.
2501  :raises GenerationError: if the schedule associated with this \
2502  OMPLoopDirective does not contain a Loop.
2503  :raises GenerationError: this directive must be inside a omp target \
2504  or parallel region.
2505  :raises GenerationError: if this OMPLoopDirective has a collapse \
2506  clause but it doesn't have the expected number of nested Loops.
2507 
2508  '''
2509  if len(self.dir_bodydir_body.children) != 1:
2510  raise GenerationError(
2511  f"OMPLoopDirective must have exactly one child in its "
2512  f"associated schedule but found {self.dir_body.children}.")
2513 
2514  if not isinstance(self.dir_bodydir_body.children[0], Loop):
2515  raise GenerationError(
2516  f"OMPLoopDirective must have a Loop as child of its associated"
2517  f" schedule but found '{self.dir_body.children[0]}'.")
2518 
2519  if not self.ancestorancestor((OMPTargetDirective, OMPParallelDirective)):
2520  # Also omp teams or omp threads regions but these are not supported
2521  # in the PSyIR
2522  raise GenerationError(
2523  f"OMPLoopDirective must be inside a OMPTargetDirective or a "
2524  f"OMPParallelDirective, but '{self}' is not.")
2525 
2526  # If there is a collapse clause, there must be as many immediately
2527  # nested loops as the collapse value
2528  if self._collapse_collapse:
2529  cursor = self.dir_bodydir_body.children[0]
2530  for depth in range(self._collapse_collapse):
2531  if not isinstance(cursor, Loop):
2532  raise GenerationError(
2533  f"OMPLoopDirective must have as many immediately "
2534  f"nested loops as the collapse clause specifies but "
2535  f"'{self}' has a collapse={self._collapse} and the "
2536  f"nested statement at depth {depth} is a "
2537  f"{type(cursor).__name__} rather than a Loop.")
2538  cursor = cursor.loop_body.children[0]
2539 
2540  super().validate_global_constraints()
2541 
2542 
2544  '''
2545  OpenMP directive to represent that the memory accesses in the associated
2546  assignment must be performed atomically.
2547  Note that the standard supports blocks with 2 assignments but this is
2548  currently unsupported in the PSyIR.
2549 
2550  '''
2551  def begin_string(self):
2552  '''
2553  :returns: the opening string statement of this directive.
2554  :rtype: str
2555 
2556  '''
2557  return "omp atomic"
2558 
2559  def end_string(self):
2560  '''
2561  :returns: the ending string statement of this directive.
2562  :rtype: str
2563 
2564  '''
2565  return "omp end atomic"
2566 
2567  @staticmethod
2569  ''' Check if a given statement is a valid OpenMP atomic expression. See
2570  https://www.openmp.org/spec-html/5.0/openmpsu95.html
2571 
2572  :param stmt: a node to be validated.
2573  :type stmt: :py:class:`psyclone.psyir.nodes.Node`
2574 
2575  :returns: whether a given statement is compliant with the OpenMP
2576  atomic expression.
2577  :rtype: bool
2578 
2579  '''
2580  if not isinstance(stmt, Assignment):
2581  return False
2582 
2583  # Not all rules are checked, just that:
2584  # - operands are of a scalar intrinsic type
2585  if not isinstance(stmt.lhs.datatype, ScalarType):
2586  return False
2587 
2588  # - the top-level operator is one of: +, *, -, /, AND, OR, EQV, NEQV
2589  if isinstance(stmt.rhs, BinaryOperation):
2590  if stmt.rhs.operator not in (BinaryOperation.Operator.ADD,
2591  BinaryOperation.Operator.SUB,
2592  BinaryOperation.Operator.MUL,
2593  BinaryOperation.Operator.DIV,
2594  BinaryOperation.Operator.AND,
2595  BinaryOperation.Operator.OR,
2596  BinaryOperation.Operator.EQV,
2597  BinaryOperation.Operator.NEQV):
2598  return False
2599  # - or intrinsics: MAX, MIN, IAND, IOR, or IEOR
2600  if isinstance(stmt.rhs, IntrinsicCall):
2601  if stmt.rhs.intrinsic not in (IntrinsicCall.Intrinsic.MAX,
2602  IntrinsicCall.Intrinsic.MIN,
2603  IntrinsicCall.Intrinsic.IAND,
2604  IntrinsicCall.Intrinsic.IOR,
2605  IntrinsicCall.Intrinsic.IEOR):
2606  return False
2607 
2608  # - one of the operands should be the same as the lhs
2609  if stmt.lhs not in stmt.rhs.children:
2610  return False
2611 
2612  return True
2613 
2615  ''' Perform validation of those global constraints that can only be
2616  done at code-generation time.
2617 
2618  :raises GenerationError: if the OMPAtomicDirective associated
2619  statement does not conform to a valid OpenMP atomic operation.
2620  '''
2621  if not self.childrenchildrenchildren or len(self.dir_bodydir_body.children) != 1:
2622  raise GenerationError(
2623  f"Atomic directives must always have one and only one"
2624  f" associated statement, but found: '{self.debug_string()}'")
2625  stmt = self.dir_bodydir_body[0]
2626  if not self.is_valid_atomic_statementis_valid_atomic_statement(stmt):
2627  raise GenerationError(
2628  f"Statement '{self.children[0].debug_string()}' is not a "
2629  f"valid OpenMP Atomic statement.")
2630 
2631 
2633  '''
2634  OpenMP directive to inform that the associated loop can be vectorised.
2635 
2636  '''
2637  def begin_string(self):
2638  '''
2639  :returns: the opening string statement of this directive.
2640  :rtype: str
2641 
2642  '''
2643  return "omp simd"
2644 
2645  def end_string(self):
2646  '''
2647  :returns: the ending string statement of this directive.
2648  :rtype: str
2649 
2650  '''
2651  return "omp end simd"
2652 
2654  ''' Perform validation of those global constraints that can only be
2655  done at code-generation time.
2656 
2657  :raises GenerationError: if the OMPSimdDirective does not contain
2658  precisely one loop.
2659 
2660  '''
2661  if (not self.childrenchildrenchildren or len(self.dir_bodydir_body.children) != 1 or
2662  not isinstance(self.dir_bodydir_body[0], Loop)):
2663  raise GenerationError(
2664  f"The OMP SIMD directives must always have one and only one"
2665  f" associated loop, but found: '{self.debug_string()}'")
2666 
2667 
2668 # For automatic API documentation generation
2669 __all__ = ["OMPRegionDirective", "OMPParallelDirective", "OMPSingleDirective",
2670  "OMPMasterDirective", "OMPDoDirective", "OMPParallelDoDirective",
2671  "OMPSerialDirective", "OMPTaskloopDirective", "OMPTargetDirective",
2672  "OMPTaskwaitDirective", "OMPDirective", "OMPStandaloneDirective",
2673  "OMPLoopDirective", "OMPDeclareTargetDirective",
2674  "OMPAtomicDirective", "OMPSimdDirective"]
def children(self, my_children)
Definition: node.py:935
def addchild(self, child, index=None)
Definition: node.py:909
def reductions(self, reprod=None)
Definition: node.py:1396
def coloured_name(self, colour=True)
Definition: node.py:453
def reference_accesses(self, var_accesses)
Definition: node.py:1461
def walk(self, my_type, stop_type=None, depth=None)
Definition: node.py:1075
def validate_global_constraints(self)
Definition: node.py:1605
def ancestor(self, my_type, excluding=None, include_self=False, limit=None, shared_with=None)
Definition: node.py:1161
def _check_dependency_pairing_valid(self, node1, node2, task1, task2)
def _compute_accesses_get_start_stop_step(self, preceding_nodes, task, symbol)
def _check_valid_overlap(self, sympy_ref1s, sympy_ref2s)
def _valid_dependence_ref_binop(self, ref1, ref2, task1, task2)
def _compute_accesses(self, ref, preceding_nodes, task)
def _valid_dependence_ranges(self, arraymixin1, arraymixin2, index)