Reference Guide  2.5.0
lfric_loop.py
1 # -----------------------------------------------------------------------------
2 # BSD 3-Clause License
3 #
4 # Copyright (c) 2017-2024, Science and Technology Facilities Council.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are met:
9 #
10 # * Redistributions of source code must retain the above copyright notice, this
11 # list of conditions and the following disclaimer.
12 #
13 # * Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
16 #
17 # * Neither the name of the copyright holder nor the names of its
18 # contributors may be used to endorse or promote products derived from
19 # this software without specific prior written permission.
20 #
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 # POSSIBILITY OF SUCH DAMAGE.
33 # -----------------------------------------------------------------------------
34 # Authors R. W. Ford, A. R. Porter and S. Siso, STFC Daresbury Lab
35 # Modified I. Kavcic, A. Coughtrie, L. Turner and O. Brunt, Met Office
36 # Modified J. Henrichs, Bureau of Meteorology
37 # Modified A. B. G. Chalk and N. Nobre, STFC Daresbury Lab
38 
39 ''' This module implements the PSyclone LFRic API by specialising the PSyLoop
40  base class from psyGen.py.
41  '''
42 
43 from psyclone.configuration import Config
44 from psyclone.core import AccessType
45 from psyclone.domain.common.psylayer import PSyLoop
46 from psyclone.domain.lfric import LFRicConstants, LFRicKern
47 from psyclone.domain.lfric.lfric_builtins import LFRicBuiltIn
48 from psyclone.domain.lfric.lfric_types import LFRicTypes
49 from psyclone.errors import GenerationError, InternalError
50 from psyclone.f2pygen import CallGen, CommentGen
51 from psyclone.psyGen import InvokeSchedule, HaloExchange
52 from psyclone.psyir.nodes import (Loop, Literal, Schedule, Reference,
53  ArrayReference, ACCRegionDirective,
54  OMPRegionDirective, Routine)
55 from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE
56 
57 
59  '''
60  The LFRic-specific PSyLoop class. This passes the LFRic-specific
61  loop information to the base class so it creates the one
62  we require. Creates LFRic-specific loop bounds when the code is
63  being generated.
64 
65  :param str loop_type: the type (iteration space) of this loop.
66  :param kwargs: additional keyword arguments provided to the PSyIR node.
67  :type kwargs: unwrapped dict.
68 
69  :raises InternalError: if an unrecognised loop_type is specified.
70 
71  '''
72  # pylint: disable=too-many-instance-attributes
73  def __init__(self, loop_type="", **kwargs):
74  const = LFRicConstants()
75  super().__init__(valid_loop_types=const.VALID_LOOP_TYPES, **kwargs)
76  self.loop_typeloop_typeloop_typeloop_typeloop_type = loop_type
77 
78  # Set our variable at initialisation as it might be required
79  # by other classes before code generation. A 'null' loop does not
80  # have an associated variable.
81  if self.loop_typeloop_typeloop_typeloop_typeloop_type != "null":
82 
83  if self.loop_typeloop_typeloop_typeloop_typeloop_type == "colours":
84  tag = "colours_loop_idx"
85  suggested_name = "colour"
86  elif self.loop_typeloop_typeloop_typeloop_typeloop_type == "colour":
87  tag = "cell_loop_idx"
88  suggested_name = "cell"
89  elif self.loop_typeloop_typeloop_typeloop_typeloop_type == "dof":
90  tag = "dof_loop_idx"
91  suggested_name = "df"
92  elif self.loop_typeloop_typeloop_typeloop_typeloop_type == "":
93  tag = "cell_loop_idx"
94  suggested_name = "cell"
95  else:
96  raise InternalError(
97  f"Unsupported loop type '{self.loop_type}' found when "
98  f"creating loop variable. Supported values are 'colours', "
99  f"'colour', 'dof' or '' (for cell-columns).")
100 
101  self.variablevariable = self.scope.symbol_table.find_or_create_tag(
102  tag, root_name=suggested_name, symbol_type=DataSymbol,
103  datatype=LFRicTypes("LFRicIntegerScalarDataType")())
104 
105  # Pre-initialise the Loop children # TODO: See issue #440
106  self.addchild(Literal("NOT_INITIALISED", INTEGER_TYPE,
107  parent=self)) # start
108  self.addchild(Literal("NOT_INITIALISED", INTEGER_TYPE,
109  parent=self)) # stop
110  self.addchild(Literal("1", INTEGER_TYPE, parent=self)) # step
111  self.addchild(Schedule(parent=self)) # loop body
112 
113  # At this stage we don't know what our loop bounds are
114  self._lower_bound_name_lower_bound_name = None
115  self._lower_bound_index_lower_bound_index = None
116  self._upper_bound_name_upper_bound_name = None
117  self._upper_bound_halo_depth_upper_bound_halo_depth = None
118 
120  '''In-place replacement of DSL or high-level concepts into generic
121  PSyIR constructs. This function replaces a LFRicLoop with a PSyLoop
122  and inserts the loop boundaries into the new PSyLoop, or removes
123  the loop node in case of a domain kernel. Once TODO #1731 is done
124  (which should fix the loop boundaries, which atm rely on index of
125  the loop in the schedule, i.e. can change when transformations are
126  applied), this function can likely be removed.
127 
128  :returns: the lowered version of this node.
129  :rtype: :py:class:`psyclone.psyir.node.Node`
130 
131  '''
132  if self._loop_type_loop_type_loop_type != "null":
133  # This is not a 'domain' loop (i.e. there is a real loop). First
134  # check that there isn't any validation issues with the node.
135  for child in self.loop_body.children:
136  child.validate_global_constraints()
137 
138  # Then generate the loop bounds, this needs to be done BEFORE
139  # lowering the loop body because it needs kernel information.
140  start = self.start_exprstart_expr.copy()
141  stop = self.stop_exprstop_expr.copy()
142  step = self.step_expr.copy()
143 
144  # Now we can lower the nodes in the loop body
145  for child in self.loop_body.children:
146  child.lower_to_language_level()
147 
148  # Finally create the new lowered Loop and replace the domain one
149  loop = Loop.create(self._variable, start, stop, step, [])
150  loop.loop_body._symbol_table = \
151  self.loop_body.symbol_table.shallow_copy()
152  loop.children[3] = self.loop_body.copy()
153  self.replace_with(loop)
154  lowered_node = loop
155  else:
156  # If loop_type is "null" we do not need a loop at all, just the
157  # kernel in its loop_body
158  for child in self.loop_body.children:
159  child.lower_to_language_level()
160  # TODO #1010: This restriction can be removed when also lowering
161  # the parent InvokeSchedule
162  if len(self.loop_body.children) > 1:
163  raise NotImplementedError(
164  f"Lowering LFRic domain loops that produce more than one "
165  f"children is not yet supported, but found:\n "
166  f"{self.view()}")
167  lowered_node = self.loop_body[0].detach()
168  self.replace_with(lowered_node)
169 
170  return lowered_node
171 
172  def node_str(self, colour=True):
173  ''' Creates a text summary of this loop node. We override this
174  method from the Loop class because, in Dynamo0.3, the function
175  space is now an object and we need to call orig_name on it. We
176  also include the upper loop bound as this can now be modified.
177 
178  :param bool colour: whether or not to include control codes for colour.
179 
180  :returns: text summary of this node, optionally with control codes \
181  for colour highlighting.
182  :rtype: str
183 
184  '''
185  if self._loop_type_loop_type_loop_type == "null":
186  return f"{self.coloured_name(colour)}[type='null']"
187 
188  if self._upper_bound_halo_depth_upper_bound_halo_depth:
189  upper_bound = (f"{self._upper_bound_name}"
190  f"({self._upper_bound_halo_depth})")
191  else:
192  upper_bound = self._upper_bound_name_upper_bound_name
193  return (f"{self.coloured_name(colour)}[type='{self._loop_type}', "
194  f"field_space='{self._field_space.orig_name}', "
195  f"it_space='{self.iteration_space}', "
196  f"upper_bound='{upper_bound}']")
197 
198  def load(self, kern):
199  '''
200  Load the state of this Loop using the supplied Kernel
201  object. This method is provided so that we can individually
202  construct Loop objects for a given kernel call.
203 
204  :param kern: Kernel object to use to populate state of Loop
205  :type kern: :py:class:`psyclone.domain.lfric.LFRicKern`
206 
207  :raises GenerationError: if the field updated by the kernel has an \
208  unexpected function space or if the kernel's 'operates-on' is \
209  not consistent with the loop type.
210 
211  '''
212  self._kern_kern_kern = kern
213 
214  self._field_field_field = kern.arguments.iteration_space_arg()
215  self._field_name_field_name_field_name = self._field_field_field.name
216  self._field_space_field_space_field_space = self._field_field_field.function_space
217 
218  if self.loop_typeloop_typeloop_typeloop_typeloop_type == "null" and kern.iterates_over != "domain":
219  raise GenerationError(
220  f"A LFRicLoop of type 'null' can only contain a kernel that "
221  f"operates on the 'domain' but kernel '{kern.name}' operates "
222  f"on '{kern.iterates_over}'.")
223  self._iteration_space_iteration_space_iteration_space = kern.iterates_over # cell_columns etc.
224 
225  # Loop bounds
226  self.set_lower_boundset_lower_bound("start")
227  const = LFRicConstants()
228  if isinstance(kern, LFRicBuiltIn):
229  # If the kernel is a built-in/pointwise operation
230  # then this loop must be over DoFs
231  if Config.get().api_conf("dynamo0.3").compute_annexed_dofs \
232  and Config.get().distributed_memory \
233  and not kern.is_reduction:
234  self.set_upper_boundset_upper_bound("nannexed")
235  else:
236  self.set_upper_boundset_upper_bound("ndofs")
237  else:
238  if Config.get().distributed_memory:
239  if self._field_field_field.is_operator:
240  # We always compute operators redundantly out to the L1
241  # halo
242  self.set_upper_boundset_upper_bound("cell_halo", index=1)
243  elif (self.field_spacefield_spacefield_spacefield_space.orig_name in
244  const.VALID_DISCONTINUOUS_NAMES):
245  # Iterate to ncells for all discontinuous quantities,
246  # including any_discontinuous_space
247  self.set_upper_boundset_upper_bound("ncells")
248  elif (self.field_spacefield_spacefield_spacefield_space.orig_name in
249  const.CONTINUOUS_FUNCTION_SPACES):
250  # Must iterate out to L1 halo for continuous quantities
251  # unless the only arguments that are updated all have
252  # 'GH_WRITE' access. The only time such an access is
253  # permitted for a field on a continuous space is when the
254  # kernel is implemented such that any writes to a given
255  # shared dof are guaranteed to write the same value. There
256  # is therefore no need to iterate into the L1 halo in order
257  # to get correct values for annexed dofs.
258  if not kern.all_updates_are_writes:
259  self.set_upper_boundset_upper_bound("cell_halo", index=1)
260  else:
261  self.set_upper_boundset_upper_bound("ncells")
262  elif (self.field_spacefield_spacefield_spacefield_space.orig_name in
263  const.VALID_ANY_SPACE_NAMES):
264  # We don't know whether any_space is continuous or not
265  # so we have to err on the side of caution and assume that
266  # it is. Again, if the only arguments that are updated have
267  # 'GH_WRITE' access then we can relax this condition.
268  if not kern.all_updates_are_writes:
269  self.set_upper_boundset_upper_bound("cell_halo", index=1)
270  else:
271  self.set_upper_boundset_upper_bound("ncells")
272  else:
273  raise GenerationError(
274  f"Unexpected function space found. Expecting one of "
275  f"{const.VALID_FUNCTION_SPACES} but found "
276  f"'{self.field_space.orig_name}'")
277  else: # sequential
278  self.set_upper_boundset_upper_bound("ncells")
279 
280  def set_lower_bound(self, name, index=None):
281  ''' Set the lower bounds of this loop '''
282  const = LFRicConstants()
283  if name not in const.VALID_LOOP_BOUNDS_NAMES:
284  raise GenerationError(
285  "The specified lower bound loop name is invalid")
286  if name in ["inner"] + const.HALO_ACCESS_LOOP_BOUNDS and index < 1:
287  raise GenerationError(
288  "The specified index '{index}' for this lower loop bound is "
289  "invalid")
290  self._lower_bound_name_lower_bound_name = name
291  self._lower_bound_index_lower_bound_index = index
292 
293  def set_upper_bound(self, name, index=None):
294  '''Set the upper bound of this loop
295 
296  :param name: A loop upper bound name. This should be a supported name.
297  :type name: String
298  :param index: An optional argument indicating the depth of halo
299  :type index: int
300 
301  '''
302  const = LFRicConstants()
303  if name not in const.VALID_LOOP_BOUNDS_NAMES:
304  raise GenerationError(
305  f"The specified upper loop bound name is invalid. Expected "
306  f"one of {const.VALID_LOOP_BOUNDS_NAMES} but found '{name}'")
307  if name == "start":
308  raise GenerationError("'start' is not a valid upper bound")
309  # Only halo bounds and inner may have an index. We could just
310  # test for index here and assume that index is None for other
311  # types of bounds, but checking the type of bound as well is a
312  # safer option.
313  if name in (["inner"] + const.HALO_ACCESS_LOOP_BOUNDS) and \
314  index is not None:
315  if index < 1:
316  raise GenerationError(
317  f"The specified index '{index}' for this upper loop bound "
318  f"is invalid")
319  self._upper_bound_name_upper_bound_name = name
320  self._upper_bound_halo_depth_upper_bound_halo_depth = index
321 
322  @property
323  def upper_bound_name(self):
324  ''' Returns the name of the upper loop bound '''
325  return self._upper_bound_name_upper_bound_name
326 
327  @property
329  '''Returns the index of the upper loop bound. This is None if the upper
330  bound name is not in HALO_ACCESS_LOOP_BOUNDS.
331 
332  :returns: the depth of the halo for a loops upper bound. If it \
333  is None then a depth has not been provided. The depth value is \
334  only valid when the upper-bound name is associated with a halo \
335  e.g. 'cell_halo'.
336  :rtype: int
337 
338  '''
339  return self._upper_bound_halo_depth_upper_bound_halo_depth
340 
341  def _lower_bound_fortran(self):
342  '''
343  Create the associated Fortran code for the type of lower bound.
344 
345  TODO: Issue #440. lower_bound_fortran should generate PSyIR.
346 
347  :returns: the Fortran code for the lower bound.
348  :rtype: str
349 
350  :raises GenerationError: if self._lower_bound_name is not "start" \
351  for sequential code.
352  :raises GenerationError: if self._lower_bound_name is unrecognised.
353 
354  '''
355  if not Config.get().distributed_memory and \
356  self._lower_bound_name_lower_bound_name != "start":
357  raise GenerationError(
358  f"The lower bound must be 'start' if we are sequential but "
359  f"found '{self._upper_bound_name}'")
360  if self._lower_bound_name_lower_bound_name == "start":
361  return "1"
362 
363  # the start of our space is the end of the previous space +1
364  if self._lower_bound_name_lower_bound_name == "inner":
365  prev_space_name = self._lower_bound_name_lower_bound_name
366  prev_space_index_str = str(self._lower_bound_index_lower_bound_index + 1)
367  elif self._lower_bound_name_lower_bound_name == "ncells":
368  prev_space_name = "inner"
369  prev_space_index_str = "1"
370  elif (self._lower_bound_name_lower_bound_name == "cell_halo" and
371  self._lower_bound_index_lower_bound_index == 1):
372  prev_space_name = "ncells"
373  prev_space_index_str = ""
374  elif (self._lower_bound_name_lower_bound_name == "cell_halo" and
375  self._lower_bound_index_lower_bound_index > 1):
376  prev_space_name = self._lower_bound_name_lower_bound_name
377  prev_space_index_str = str(self._lower_bound_index_lower_bound_index - 1)
378  else:
379  raise GenerationError(
380  f"Unsupported lower bound name '{self._lower_bound_name}' "
381  f"found")
382  # Use InvokeSchedule SymbolTable to share the same symbol for all
383  # Loops in the Invoke.
384  mesh_obj_name = self.ancestor(InvokeSchedule).symbol_table.\
385  find_or_create_tag("mesh").name
386  return mesh_obj_name + "%get_last_" + prev_space_name + "_cell(" \
387  + prev_space_index_str + ")+1"
388 
389  @property
390  def _mesh_name(self):
391  '''
392  :returns: the name of the mesh variable from which to get the bounds \
393  for this loop.
394  :rtype: str
395  '''
396  # We must allow for self._kern being None (as it will be for
397  # a built-in).
398  if self._kern_kern_kern and self._kern_kern_kern.is_intergrid:
399  # We have more than one mesh object to choose from and we
400  # want the coarse one because that determines the iteration
401  # space. _field_name holds the name of the argument that
402  # determines the iteration space of this kernel and that
403  # is set-up to be the one on the coarse mesh (in
404  # DynKernelArguments.iteration_space_arg()).
405  tag_name = "mesh_" + self._field_name_field_name_field_name
406  else:
407  # It's not an inter-grid kernel so there's only one mesh
408  tag_name = "mesh"
409 
410  # The symbol for the mesh will already have been added to the
411  # symbol table associated with the InvokeSchedule.
412  return self.ancestor(InvokeSchedule).symbol_table.\
413  lookup_with_tag(tag_name).name
414 
415  def _upper_bound_fortran(self):
416  ''' Create the Fortran code that gives the appropriate upper bound
417  value for this type of loop.
418 
419  TODO: Issue #440. upper_bound_fortran should generate PSyIR.
420 
421  :returns: Fortran code for the upper bound of this loop.
422  :rtype: str
423 
424  '''
425  # pylint: disable=too-many-branches, too-many-return-statements
426  # precompute halo_index as a string as we use it in more than
427  # one of the if clauses
428  halo_index = ""
429  if self._upper_bound_halo_depth_upper_bound_halo_depth:
430  halo_index = str(self._upper_bound_halo_depth_upper_bound_halo_depth)
431 
432  if self._upper_bound_name_upper_bound_name == "ncolours":
433  # Loop over colours
434  kernels = self.walk(LFRicKern)
435  if not kernels:
436  raise InternalError(
437  "Failed to find a kernel within a loop over colours.")
438  # Check that all kernels have been coloured. We can't check the
439  # number of colours since that is only known at runtime.
440  for kern in kernels:
441  if not kern.ncolours_var:
442  raise InternalError(
443  f"All kernels within a loop over colours must have "
444  f"been coloured but kernel '{kern.name}' has not")
445  return kernels[0].ncolours_var
446 
447  if self._upper_bound_name_upper_bound_name == "ncolour":
448  # Loop over cells of a particular colour when DM is disabled.
449  # We use the same, DM API as that returns sensible values even
450  # when running without MPI.
451  root_name = "last_edge_cell_all_colours"
452  if self._kern_kern_kern.is_intergrid:
453  root_name += "_" + self._field_name_field_name_field_name
454  sym = self.ancestor(
455  InvokeSchedule).symbol_table.find_or_create_tag(root_name)
456  return f"{sym.name}(colour)"
457  if self._upper_bound_name_upper_bound_name == "colour_halo":
458  # Loop over cells of a particular colour when DM is enabled. The
459  # LFRic API used here allows for colouring with redundant
460  # computation.
461  sym_tab = self.ancestor(InvokeSchedule).symbol_table
462  if halo_index:
463  # The colouring API provides a 2D array that holds the last
464  # halo cell for a given colour and halo depth.
465  depth = halo_index
466  else:
467  # If no depth is specified then we go to the full halo depth
468  depth = sym_tab.find_or_create_tag(
469  f"max_halo_depth_{self._mesh_name}").name
470  root_name = "last_halo_cell_all_colours"
471  if self._kern_kern_kern.is_intergrid:
472  root_name += "_" + self._field_name_field_name_field_name
473  sym = sym_tab.find_or_create_tag(root_name)
474  return f"{sym.name}(colour, {depth})"
475  if self._upper_bound_name_upper_bound_name in ["ndofs", "nannexed"]:
476  if Config.get().distributed_memory:
477  if self._upper_bound_name_upper_bound_name == "ndofs":
478  result = (f"{self.field.proxy_name_indexed}%"
479  f"{self.field.ref_name()}%get_last_dof_owned()")
480  else: # nannexed
481  result = (
482  f"{self.field.proxy_name_indexed}%"
483  f"{self.field.ref_name()}%get_last_dof_annexed()")
484  else:
485  result = self._kern_kern_kern.undf_name
486  return result
487  if self._upper_bound_name_upper_bound_name == "ncells":
488  if Config.get().distributed_memory:
489  result = f"{self._mesh_name}%get_last_edge_cell()"
490  else:
491  result = (f"{self.field.proxy_name_indexed}%"
492  f"{self.field.ref_name()}%get_ncell()")
493  return result
494  if self._upper_bound_name_upper_bound_name == "cell_halo":
495  if Config.get().distributed_memory:
496  return f"{self._mesh_name}%get_last_halo_cell({halo_index})"
497  raise GenerationError(
498  "'cell_halo' is not a valid loop upper bound for "
499  "sequential/shared-memory code")
500  if self._upper_bound_name_upper_bound_name == "dof_halo":
501  if Config.get().distributed_memory:
502  return (f"{self.field.proxy_name_indexed}%"
503  f"{self.field.ref_name()}%get_last_dof_halo("
504  f"{halo_index})")
505  raise GenerationError(
506  "'dof_halo' is not a valid loop upper bound for "
507  "sequential/shared-memory code")
508  if self._upper_bound_name_upper_bound_name == "inner":
509  if Config.get().distributed_memory:
510  return f"{self._mesh_name}%get_last_inner_cell({halo_index})"
511  raise GenerationError(
512  "'inner' is not a valid loop upper bound for "
513  "sequential/shared-memory code")
514  raise GenerationError(
515  f"Unsupported upper bound name '{self._upper_bound_name}' found "
516  f"in lfricloop.upper_bound_fortran()")
517 
518  def _halo_read_access(self, arg):
519  '''
520  Determines whether the supplied argument has (or might have) its
521  halo data read within this loop. Returns True if it does, or if
522  it might and False if it definitely does not.
523 
524  :param arg: an argument contained within this loop.
525  :type arg: :py:class:`psyclone.dynamo0p3.DynArgument`
526 
527  :returns: True if the argument reads, or might read from the \
528  halo and False otherwise.
529  :rtype: bool
530 
531  :raises GenerationError: if an unsupported upper loop bound name is \
532  provided for kernels with stencil access.
533  :raises InternalError: if an unsupported field access is found.
534  :raises InternalError: if an unsupported argument type is found.
535 
536  '''
537  const = LFRicConstants()
538  if arg.is_scalar or arg.is_operator:
539  # Scalars and operators do not have halos
540  return False
541  if arg.is_field:
542  # This is a field so might read from a halo
543  if arg.access in [AccessType.WRITE]:
544  # This is not a read access
545  return False
546  if arg.access in AccessType.all_read_accesses():
547  # This is a read access
548  if arg.descriptor.stencil:
549  if self._upper_bound_name_upper_bound_name not in ["cell_halo", "ncells"]:
550  raise GenerationError(
551  f"Loop bounds other than 'cell_halo' and 'ncells' "
552  f"are currently unsupported for kernels with "
553  f"stencil accesses. Found "
554  f"'{self._upper_bound_name}'.")
555  # An upper bound of 'cell_halo' means that the
556  # halo might be accessed irrespective of the
557  # stencil and a stencil read access with upper
558  # bound 'ncells' might read from the
559  # halo due to the stencil.
560  return True
561  # This is a non-stencil read access
562  if self._upper_bound_name_upper_bound_name in const.HALO_ACCESS_LOOP_BOUNDS:
563  # An upper bound that is part of the halo means
564  # that the halo might be accessed.
565  return True
566  if (not arg.discontinuous and
567  self.kernelkernelkernelkernel.iterates_over == "cell_column" and
568  self.kernelkernelkernelkernel.all_updates_are_writes and
569  self._upper_bound_name_upper_bound_name == "ncells"):
570  # This is the special case of a kernel that guarantees to
571  # write the same value to any given dof, irrespective of
572  # cell column.
573  return False
574  if not arg.discontinuous and \
575  self._upper_bound_name_upper_bound_name in ["ncells", "nannexed"]:
576  # Annexed dofs may be accessed. Return False if we
577  # always compute annexed dofs and True if we don't
578  # (as annexed dofs are part of the level 1 halo).
579  return not Config.get().api_conf("dynamo0.3").\
580  compute_annexed_dofs
581  # The halo is not accessed.
582  return False
583  raise InternalError(
584  f"Unexpected field access type '{arg.access}' found for arg "
585  f"'{arg.name}'.")
586  raise InternalError(
587  f"Expecting arg '{arg.name}' to be an operator, scalar or field, "
588  f"but found '{arg.argument_type}'.")
589 
590  def _add_field_component_halo_exchange(self, halo_field, idx=None):
591  '''An internal helper method to add the halo exchange call immediately
592  before this loop using the halo_field argument for the
593  associated field information and the optional idx argument if
594  the field is a vector field.
595 
596  In certain situations the halo exchange will not be
597  required. This is dealt with by adding the halo exchange,
598  asking it if it is required and then removing it if it is
599  not. This may seem strange but the logic for determining
600  whether a halo exchange is required is within the halo
601  exchange class so it is simplest to do it this way
602 
603  :param halo_field: the argument requiring a halo exchange
604  :type halo_field: :py:class:`psyclone.dynamo0p3.DynArgument`
605  :param index: optional argument providing the vector index if
606  there is one and None if not. Defaults to None.
607  :type index: int or None
608 
609  :raises InternalError: if there are two forward write \
610  dependencies and they are both associated with halo \
611  exchanges.
612 
613  '''
614  # Avoid circular import
615  # pylint: disable=import-outside-toplevel
616  from psyclone.dynamo0p3 import LFRicHaloExchange
617  exchange = LFRicHaloExchange(halo_field,
618  parent=self.parent,
619  vector_index=idx)
620  self.parent.children.insert(self.position,
621  exchange)
622 
623  # Is this halo exchange required? The halo exchange being
624  # added may replace an existing halo exchange, which would
625  # then be returned as a halo exchange dependence and an
626  # exception raised (as a halo exchange should not have another
627  # halo exchange as a dependence). Therefore, halo exchange
628  # dependencies are ignored here by setting the ignore_hex_dep
629  # optional argument.
630  required, _ = exchange.required(ignore_hex_dep=True)
631  if not required:
632  exchange.detach()
633  else:
634  # The halo exchange we have added may be replacing an
635  # existing one. If so, the one being replaced will be the
636  # first and only write dependence encountered and must be
637  # removed.
638  results = exchange.field.forward_write_dependencies()
639  if results:
640  first_dep_call = results[0].call
641  if isinstance(first_dep_call, HaloExchange):
642  # Sanity check. If the first dependence is a field
643  # accessed within a halo exchange then the
644  # subsequent one must not be.
645  next_results = results[0].forward_write_dependencies()
646  if next_results and any(tmp for tmp in next_results
647  if isinstance(tmp.call,
648  HaloExchange)):
649  raise InternalError(
650  f"When replacing a halo exchange with another one "
651  f"for field {exchange.field.name}, a subsequent "
652  f"dependent halo exchange was found. This should "
653  f"never happen.")
654  first_dep_call.detach()
655 
656  def _add_halo_exchange(self, halo_field):
657  '''Internal helper method to add (a) halo exchange call(s) immediately
658  before this loop using the halo_field argument for the
659  associated field information. If the field is a vector then
660  add the appropriate number of halo exchange calls.
661 
662  :param halo_field: the argument requiring a halo exchange
663  :type halo_field: :py:class:`psyclone.dynamo0p3.DynArgument`
664 
665  '''
666  if halo_field.vector_size > 1:
667  # the range function below returns values from
668  # 1 to the vector size which is what we
669  # require in our Fortran code
670  for idx in range(1, halo_field.vector_size+1):
671  self._add_field_component_halo_exchange_add_field_component_halo_exchange(halo_field, idx)
672  else:
673  self._add_field_component_halo_exchange_add_field_component_halo_exchange(halo_field)
674 
676  '''add and/or remove halo exchanges due to changes in the loops
677  bounds'''
678  # this call adds any new halo exchanges that are
679  # required. This is done by adding halo exchanges before this
680  # loop for any fields in the loop that require a halo exchange
681  # and don't already have one
682  self.create_halo_exchangescreate_halo_exchanges()
683  # Now remove any existing halo exchanges that are no longer
684  # required. This is done by removing halo exchanges after this
685  # loop where a field in this loop previously had a forward
686  # dependence on a halo exchange but no longer does
687  # pylint: disable=too-many-nested-blocks
688  # Avoid circular import
689  # pylint: disable=import-outside-toplevel
690  from psyclone.dynamo0p3 import LFRicHaloExchange
691  for call in self.kernels():
692  for arg in call.arguments.args:
693  if arg.access in AccessType.all_write_accesses():
694  dep_arg_list = arg.forward_read_dependencies()
695  for dep_arg in dep_arg_list:
696  if isinstance(dep_arg.call, LFRicHaloExchange):
697  # found a halo exchange as a forward dependence
698  # ask the halo exchange if it is required
699  halo_exchange = dep_arg.call
700  required, _ = halo_exchange.required()
701  if not required:
702  halo_exchange.detach()
703 
705  '''Add halo exchanges before this loop as required by fields within
706  this loop. To keep the logic simple we assume that any field
707  that accesses the halo will require a halo exchange and then
708  remove the halo exchange if this is not the case (when
709  previous writers perform sufficient redundant computation). It
710  is implemented this way as the halo exchange class determines
711  whether it is required or not so a halo exchange needs to
712  exist in order to find out. The appropriate logic is coded in
713  the _add_halo_exchange helper method. In some cases a new halo
714  exchange will replace an existing one. In this situation that
715  routine also removes the old one.
716 
717  '''
718  for halo_field in self.unique_fields_with_halo_readsunique_fields_with_halo_reads():
719  # for each unique field in this loop that has its halo
720  # read (including annexed dofs), find the previous update
721  # of this field
722  prev_arg_list = halo_field.backward_write_dependencies()
723  if not prev_arg_list:
724  # field has no previous dependence so create new halo
725  # exchange(s) as we don't know the state of the fields
726  # halo on entry to the invoke
727  self._add_halo_exchange_add_halo_exchange(halo_field)
728  else:
729  # field has one or more previous dependencies
730  if len(prev_arg_list) > 1:
731  # field has more than one previous dependencies so
732  # should be a vector
733  if halo_field.vector_size <= 1:
734  raise GenerationError(
735  f"Error in create_halo_exchanges. Expecting field "
736  f"'{halo_field.name}' to be a vector as it has "
737  f"multiple previous dependencies")
738  if len(prev_arg_list) != halo_field.vector_size:
739  raise GenerationError(
740  f"Error in create_halo_exchanges. Expecting a "
741  f"dependence for each vector index for field "
742  f"'{halo_field.name}' but the number of "
743  f"dependencies is '{halo_field.vector_size}' and "
744  f"the vector size is '{len(prev_arg_list)}'.")
745  for arg in prev_arg_list:
746  # Avoid circular import
747  # pylint: disable=import-outside-toplevel
748  from psyclone.dynamo0p3 import LFRicHaloExchange
749  if not isinstance(arg.call, LFRicHaloExchange):
750  raise GenerationError(
751  "Error in create_halo_exchanges. Expecting "
752  "all dependent nodes to be halo exchanges")
753  prev_node = prev_arg_list[0].call
754  # Avoid circular import
755  # pylint: disable=import-outside-toplevel
756  from psyclone.dynamo0p3 import LFRicHaloExchange
757  if not isinstance(prev_node, LFRicHaloExchange):
758  # previous dependence is not a halo exchange so
759  # call the add halo exchange logic which
760  # determines whether a halo exchange is required
761  # or not
762  self._add_halo_exchange_add_halo_exchange(halo_field)
763 
764  @property
765  def start_expr(self):
766  '''
767  :returns: the PSyIR for the lower bound of this loop.
768  :rtype: :py:class:`psyclone.psyir.Node`
769 
770  '''
771  inv_sched = self.ancestor(Routine)
772  sym_table = inv_sched.symbol_table
773  loops = inv_sched.loops()
774  posn = None
775  for index, loop in enumerate(loops):
776  if loop is self:
777  posn = index
778  break
779  root_name = f"loop{posn}_start"
780  lbound = sym_table.find_or_create_integer_symbol(root_name,
781  tag=root_name)
782  self.children[0] = Reference(lbound)
783  return self.children[0]
784 
785  @property
786  def stop_expr(self):
787  '''
788  :returns: the PSyIR for the upper bound of this loop.
789  :rtype: :py:class:`psyclone.psyir.Node`
790 
791  '''
792  inv_sched = self.ancestor(Routine)
793  sym_table = inv_sched.symbol_table
794 
795  if self._loop_type_loop_type_loop_type == "colour":
796  # If this loop is over all cells of a given colour then we must
797  # lookup the loop bound as it depends on the current colour.
798  parent_loop = self.ancestor(Loop)
799  colour_var = parent_loop.variable
800 
801  asym = self.kernelkernelkernelkernel.last_cell_all_colours_symbol
802  const = LFRicConstants()
803 
804  if self.upper_bound_nameupper_bound_name in const.HALO_ACCESS_LOOP_BOUNDS:
805  if self._upper_bound_halo_depth_upper_bound_halo_depth:
806  # TODO: #696 Add kind (precision) once the
807  # LFRicInvokeSchedule constructor has been extended to
808  # create the necessary symbols.
809  halo_depth = Literal(str(self._upper_bound_halo_depth_upper_bound_halo_depth),
810  INTEGER_TYPE)
811  else:
812  # We need to go to the full depth of the halo.
813  root_name = "mesh"
814  if self.kernels()[0].is_intergrid:
815  root_name += f"_{self._field_name}"
816  depth_sym = sym_table.lookup_with_tag(
817  f"max_halo_depth_{root_name}")
818  halo_depth = Reference(depth_sym)
819 
820  return ArrayReference.create(asym, [Reference(colour_var),
821  halo_depth])
822  return ArrayReference.create(asym, [Reference(colour_var)])
823 
824  # This isn't a 'colour' loop so we have already set-up a
825  # variable that holds the upper bound.
826  loops = inv_sched.loops()
827  posn = None
828  for index, loop in enumerate(loops):
829  if loop is self:
830  posn = index
831  break
832  root_name = f"loop{posn}_stop"
833  ubound = sym_table.find_or_create_integer_symbol(root_name,
834  tag=root_name)
835  self.children[1] = Reference(ubound)
836  return self.children[1]
837 
838  def gen_code(self, parent):
839  ''' Call the base class to generate the code and then add any
840  required halo exchanges.
841 
842  :param parent: an f2pygen object that will be the parent of \
843  f2pygen objects created in this method.
844  :type parent: :py:class:`psyclone.f2pygen.BaseGen`
845 
846 
847  '''
848  # pylint: disable=too-many-statements, too-many-branches
849  # Check that we're not within an OpenMP parallel region if
850  # we are a loop over colours.
851  if self._loop_type_loop_type_loop_type == "colours" and self.is_openmp_parallel():
852  raise GenerationError("Cannot have a loop over colours within an "
853  "OpenMP parallel region.")
854 
855  super().gen_code(parent)
856  # TODO #1010: gen_code of this loop calls the PSyIR lowering version,
857  # but that method can not currently provide sibiling nodes because the
858  # ancestor is not PSyIR, so for now we leave the remainder of the
859  # gen_code logic here instead of removing the whole method.
860 
861  if not (Config.get().distributed_memory and
862  self._loop_type_loop_type_loop_type != "colour"):
863  # No need to add halo exchanges so we are done.
864  return
865 
866  # Set halo clean/dirty for all fields that are modified
867  if not self.unique_modified_argsunique_modified_args("gh_field"):
868  return
869 
870  if self.ancestor((ACCRegionDirective, OMPRegionDirective)):
871  # We cannot include calls to set halos dirty/clean within OpenACC
872  # or OpenMP regions. This is handled by the appropriate Directive
873  # class instead.
874  # TODO #1755 can this check be made more general (e.g. to include
875  # Extraction regions)?
876  return
877 
878  parent.add(CommentGen(parent, ""))
879  if self._loop_type_loop_type_loop_type != "null":
880  prev_node_name = "loop"
881  else:
882  prev_node_name = "kernel"
883  parent.add(CommentGen(parent, f" Set halos dirty/clean for fields "
884  f"modified in the above {prev_node_name}"))
885  parent.add(CommentGen(parent, ""))
886 
887  self.gen_mark_halos_clean_dirtygen_mark_halos_clean_dirtygen_mark_halos_clean_dirty(parent)
888 
889  parent.add(CommentGen(parent, ""))
890 
891  def gen_mark_halos_clean_dirty(self, parent):
892  '''
893  Generates the necessary code to mark halo regions for all modified
894  fields as clean or dirty following execution of this loop.
895 
896  :param parent: the node in the f2pygen AST to which to add content.
897  :type parent: :py:class:`psyclone.f2pygen.BaseGen`
898 
899  '''
900  # Set halo clean/dirty for all fields that are modified
901  fields = self.unique_modified_argsunique_modified_args("gh_field")
902 
903  sym_table = self.ancestor(InvokeSchedule).symbol_table
904 
905  # First set all of the halo dirty unless we are
906  # subsequently going to set all of the halo clean
907  for field in fields:
908  # Avoid circular import
909  # pylint: disable=import-outside-toplevel
910  from psyclone.dynamo0p3 import HaloWriteAccess
911  # The HaloWriteAccess class provides information about how the
912  # supplied field is accessed within its parent loop
913  hwa = HaloWriteAccess(field, sym_table)
914  if not hwa.max_depth or hwa.dirty_outer:
915  # output set dirty as some of the halo will not be set to clean
916  if field.vector_size > 1:
917  # the range function below returns values from 1 to the
918  # vector size which is what we require in our Fortran code
919  for index in range(1, field.vector_size+1):
920  parent.add(CallGen(parent, name=field.proxy_name +
921  f"({index})%set_dirty()"))
922  else:
923  parent.add(CallGen(parent, name=field.proxy_name +
924  "%set_dirty()"))
925  # Now set appropriate parts of the halo clean where
926  # redundant computation has been performed.
927  if hwa.literal_depth:
928  # halo access(es) is/are to a fixed depth
929  halo_depth = hwa.literal_depth
930  if hwa.dirty_outer:
931  halo_depth -= 1
932  if halo_depth > 0:
933  if field.vector_size > 1:
934  # The range function below returns values from 1 to the
935  # vector size, as required in our Fortran code.
936  for index in range(1, field.vector_size+1):
937  parent.add(CallGen(
938  parent, name=f"{field.proxy_name}({index})%"
939  f"set_clean({halo_depth})"))
940  else:
941  parent.add(CallGen(
942  parent, name=f"{field.proxy_name}%set_clean("
943  f"{halo_depth})"))
944  elif hwa.max_depth:
945  # halo accesses(s) is/are to the full halo
946  # depth (-1 if continuous)
947  halo_depth = sym_table.lookup_with_tag(
948  "max_halo_depth_mesh").name
949 
950  if hwa.dirty_outer:
951  # a continuous field iterating over cells leaves the
952  # outermost halo dirty
953  halo_depth += "-1"
954  if field.vector_size > 1:
955  # the range function below returns values from 1 to the
956  # vector size which is what we require in our Fortran code
957  for index in range(1, field.vector_size+1):
958  call = CallGen(parent,
959  name=f"{field.proxy_name}({index})%"
960  f"set_clean({halo_depth})")
961  parent.add(call)
962  else:
963  call = CallGen(parent, name=f"{field.proxy_name}%"
964  f"set_clean({halo_depth})")
965  parent.add(call)
966 
968  test_all_variables=False,
969  signatures_to_ignore=None,
970  dep_tools=None):
971  '''
972  This function is an LFRic-specific override of the default method
973  in the Loop class. It allows domain-specific rules to be applied when
974  determining whether or not loop iterations are independent.
975 
976  :param bool test_all_variables: if True, it will test if all variable
977  accesses are independent, otherwise it will stop after the first
978  variable access is found that isn't.
979  :param signatures_to_ignore: list of signatures for which to skip
980  the access checks.
981  :type signatures_to_ignore: Optional[
982  List[:py:class:`psyclone.core.Signature`]]
983  :param dep_tools: an optional instance of DependencyTools so that the
984  caller can access any diagnostic messages detailing why the loop
985  iterations are not independent.
986  :type dep_tools: Optional[
987  :py:class:`psyclone.psyir.tools.DependencyTools`]
988 
989  :returns: True if the loop iterations are independent, False otherwise.
990  :rtype: bool
991 
992  '''
993  # pylint: disable=import-outside-toplevel
994  from psyclone.psyir.tools import DependencyTools, DTCode
995  if not dep_tools:
996  dtools = DependencyTools()
997  else:
998  dtools = dep_tools
999 
1000  if self.loop_typeloop_typeloop_typeloop_typeloop_type in ["null", "colours"]:
1001  # We know we can't parallelise these loops. ("null" means there
1002  # is no actual loop and "colours" is the *outer* loop over the
1003  # different colours used - it is the inner, "colour" loop over
1004  # cells of a single colour which can be parallelised.)
1005  return False
1006 
1007  try:
1008  stat = dtools.can_loop_be_parallelised(
1009  self, test_all_variables=test_all_variables,
1010  signatures_to_ignore=signatures_to_ignore)
1011  if stat:
1012  return True
1013  except (InternalError, KeyError):
1014  # LFRic still has symbols that don't exist in the symbol_table
1015  # until the gen_code() step, so the dependency analysis raises
1016  # errors in some cases.
1017  # TODO #1648 - when a transformation colours a loop we must
1018  # ensure "last_[halo]_cell_all_colours" is added to the symbol
1019  # table.
1020  return True
1021 
1022  # The generic DA says that this loop cannot be parallelised. However,
1023  # we use domain-specific information to qualify this.
1024  if self.loop_typeloop_typeloop_typeloop_typeloop_type == "colour":
1025  # This loop is either over cells of a single colour.
1026  # According to LFRic rules this is safe to parallelise.
1027  return True
1028 
1029  if self.loop_typeloop_typeloop_typeloop_typeloop_type == "dof":
1030  # The generic DA can't see the PSyIR of this Builtin (because it
1031  # hasn't been lowered to language level) so we use
1032  # domain-specific knowledge about its properties.
1033  if self.kernelkernelkernelkernel.is_reduction:
1034  dtools._add_message(
1035  f"Builtin '{self.kernel.name}' performs a reduction",
1036  DTCode.WARN_SCALAR_REDUCTION)
1037  return False
1038  return True
1039 
1040  if self.loop_typeloop_typeloop_typeloop_typeloop_type == "":
1041  # We can parallelise a non-coloured loop if it only updates
1042  # quantities on discontinuous function spaces. If an LFRic kernel
1043  # updates quantities on a continuous function space then it must
1044  # have at least one argument with GH_INC access. Therefore, we
1045  # can simply check whether or not it has such an argument in order
1046  # to infer the continuity of the space.
1047  if self.has_inc_arghas_inc_arg():
1048  dtools._add_message(
1049  f"Kernel '{self.kernel.name}' performs an INC update",
1050  DTCode.ERROR_WRITE_WRITE_RACE)
1051  return False
1052  return True
1053 
1054  raise InternalError(f"independent_iterations: loop of type "
1055  f"'{self.loop_type}' is not supported.")
1056 
1057 
1058 # ---------- Documentation utils -------------------------------------------- #
1059 # The list of module members that we wish AutoAPI to generate
1060 # documentation for. (See https://psyclone-ref.readthedocs.io)
1061 __all__ = ['LFRicLoop']
def field_space(self, my_field_space)
Definition: psyloop.py:179
def _add_halo_exchange(self, halo_field)
Definition: lfric_loop.py:656
def set_upper_bound(self, name, index=None)
Definition: lfric_loop.py:293
def _add_field_component_halo_exchange(self, halo_field, idx=None)
Definition: lfric_loop.py:590
def set_lower_bound(self, name, index=None)
Definition: lfric_loop.py:280
def independent_iterations(self, test_all_variables=False, signatures_to_ignore=None, dep_tools=None)
Definition: lfric_loop.py:970