Reference Guide  2.5.0
module_info.py
1 # -----------------------------------------------------------------------------
2 # BSD 3-Clause License
3 #
4 # Copyright (c) 2023-2024, Science and Technology Facilities Council.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are met:
9 #
10 # * Redistributions of source code must retain the above copyright notice, this
11 # list of conditions and the following disclaimer.
12 #
13 # * Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
16 #
17 # * Neither the name of the copyright holder nor the names of its
18 # contributors may be used to endorse or promote products derived from
19 # this software without specific prior written permission.
20 #
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 # POSSIBILITY OF SUCH DAMAGE.
33 # -----------------------------------------------------------------------------
34 # Author J. Henrichs, Bureau of Meteorology
35 
36 '''This module contains the ModuleInfo class, which is used to store
37 and cache information about a module: the filename, source code (if requested)
38 and the fparser tree (if requested), and information about any routines it
39 includes, and external symbol usage.
40 '''
41 
42 import os
43 
44 from fparser.common.readfortran import FortranStringReader
45 from fparser.two.Fortran2003 import (Function_Subprogram, Interface_Block,
46  Interface_Stmt, Procedure_Stmt,
47  Subroutine_Subprogram, Use_Stmt)
48 from fparser.two.parser import ParserFactory
49 from fparser.two.utils import FortranSyntaxError, walk
50 
51 from psyclone.errors import InternalError, PSycloneError
52 from psyclone.psyir.frontend.fparser2 import Fparser2Reader
53 from psyclone.psyir.nodes import Container, FileContainer
54 from psyclone.psyir.symbols import SymbolError
55 
56 
57 # ============================================================================
59  '''
60  PSyclone-specific exception for use when an error with the module manager
61  happens - typically indicating that some module information cannot be
62  found.
63 
64  :param str value: the message associated with the error.
65 
66  '''
67  def __init__(self, value):
68  PSycloneError.__init__(self, value)
69  self.valuevaluevalue = "ModuleInfo error: "+str(value)
70 
71 
72 # ============================================================================
73 class ModuleInfo:
74  # pylint: disable=too-many-instance-attributes
75  '''This class stores mostly cached information about modules: it stores
76  the original filename, if requested it will read the file and then caches
77  the plain text file, and if required it will parse the file, and then
78  cache the fparser AST.
79 
80  :param str name: the module name.
81  :param str filename: the name of the source file that stores this module \
82  (including path).
83 
84  '''
85 
86  def __init__(self, name, filename):
87  self._name_name = name
88  self._filename_filename = filename
89  # A cache for the source code:
90  self._source_code_source_code = None
91 
92  # A cache for the fparser tree
93  self._parse_tree_parse_tree = None
94 
95  # A cache for the PSyIR representation
96  self._psyir_psyir = None
97 
98  # A cache for the module dependencies: this is just a set
99  # of all modules used by this module. Type: set[str]
100  self._used_modules_used_modules = None
101 
102  # This is a dictionary containing the sets of symbols imported from
103  # each module, indexed by the module names: dict[str, set[str]].
104  self._used_symbols_from_module_used_symbols_from_module = None
105 
106  # This variable will be a set that stores the name of all routines
107  # (based on fparser), so we can test is a routine is defined
108  # without having to convert the AST to PSyIR. It is initialised with
109  # None so we avoid trying to parse a file more than once (parsing
110  # errors would cause routine_names to be empty, so we can test
111  # if routine_name is None vs if routine_names is empty)
112  # TODO #2435: To be changed once we have support for interfaces
113  self._routine_names_routine_names = None
114 
115  # This map contains the list of routine names that are part
116  # of the same generic interface.
117  # TODO #2435: To be changed once we have support for interfaces
118  self._generic_interfaces_generic_interfaces = {}
119 
120  self._processor_processor = Fparser2Reader()
121 
122  # ------------------------------------------------------------------------
123  @property
124  def name(self):
125  ''':returns: the name of this module.
126  :rtype: str
127 
128  '''
129  return self._name_name
130 
131  # ------------------------------------------------------------------------
132  @property
133  def filename(self):
134  ''':returns: the filename that contains the source code for this \
135  module.
136  :rtype: str
137 
138  '''
139  return self._filename_filename
140 
141  # ------------------------------------------------------------------------
142  def get_source_code(self):
143  '''Returns the source code for the module. The first time, it
144  will be read from the file, but the data is then cached.
145 
146  :returns: the source code.
147  :rtype: str
148 
149  :raises ModuleInfoError: when the file cannot be read.
150 
151  '''
152  if self._source_code_source_code is None:
153  try:
154  with open(self._filename_filename, "r", encoding='utf-8') as file_in:
155  self._source_code_source_code = file_in.read()
156  except FileNotFoundError as err:
157  raise ModuleInfoError(
158  f"Could not find file '{self._filename}' when trying to "
159  f"read source code for module '{self._name}'") from err
160 
161  return self._source_code_source_code
162 
163  # ------------------------------------------------------------------------
164  def get_parse_tree(self):
165  '''Returns the fparser AST for this module. The first time, the file
166  will be parsed by fparser using the Fortran 2008 standard. The AST is
167  then cached for any future uses.
168 
169  :returns: the fparser AST for this module.
170  :rtype: :py:class:`fparser.two.Fortran2003.Program`
171 
172  '''
173  if self._parse_tree_parse_tree is None:
174  # Set routine_names to be an empty set (it was None before).
175  # This way we avoid that any other function might trigger to
176  # parse this file again (in case of parsing errors).
177  self._routine_names_routine_names = set()
178 
179  reader = FortranStringReader(self.get_source_codeget_source_code())
180  parser = ParserFactory().create(std="f2008")
181  self._parse_tree_parse_tree = parser(reader)
182 
183  # First collect information about all subroutines/functions.
184  # Store information about generic interface to be handled later
185  # (so we only walk the tree once):
186  # TODO #2478: once generic interfaces are supported, use PSyIR
187  # instead of fparser here.
188  all_generic_interfaces = []
189  for routine in walk(self._parse_tree_parse_tree, (Function_Subprogram,
190  Subroutine_Subprogram,
191  Interface_Block)):
192  if isinstance(routine, Interface_Block):
193  all_generic_interfaces.append(routine)
194  else:
195  routine_name = str(routine.content[0].items[1])
196  self._routine_names_routine_names.add(routine_name)
197 
198  # Then handle all generic interfaces and add them to
199  # _generic_interfaces:
200  for interface in all_generic_interfaces:
201  # TODO #2422 This code does not support all potential
202  # interface statements. After #2422 we can use PSyIR here.
203  # Get the name of the interface from the Interface_Stmt:
204  name = str(walk(interface, Interface_Stmt)[0].items[0]).lower()
205  self._routine_names_routine_names.add(name)
206 
207  # Collect all specific functions for this generic interface
208  routine_names = []
209  for proc_stmt in walk(interface, Procedure_Stmt):
210  # Convert the items to strings:
211  routine_names.extend([str(i) for i in
212  proc_stmt.items[0].items])
213  self._generic_interfaces_generic_interfaces[name] = routine_names
214 
215  return self._parse_tree_parse_tree
216 
217  # ------------------------------------------------------------------------
218  def contains_routine(self, routine_name):
219  ''':returns: whether the specified routine name is part of this
220  module or not. It will also return False if the file could
221  not be parsed.
222  :rtype: bool
223 
224  '''
225  # TODO #2422 and TODO #2478: Once we parse everything to PSyIR (esp.
226  # generic interfaces), this routine can just be replaced with
227  # get_psyir().get_routine_psyir(routine_name)
228  if self._routine_names_routine_names is None:
229  # This will trigger adding routine information
230  try:
231  self.get_parse_treeget_parse_tree()
232  except FortranSyntaxError:
233  return False
234 
235  return routine_name.lower() in self._routine_names_routine_names
236 
237  # ------------------------------------------------------------------------
238  def _extract_import_information(self):
239  '''This internal function analyses a given module source file and
240  caches which modules are imported (in self._used_modules), and which
241  symbol is imported from each of these modules (in
242  self._used_symbols_from_module).
243 
244  '''
245  # Initialise the caches:
246  self._used_modules_used_modules = set()
247  self._used_symbols_from_module_used_symbols_from_module = {}
248 
249  try:
250  parse_tree = self.get_parse_treeget_parse_tree()
251  except FortranSyntaxError:
252  # TODO #11: Add proper logging
253  # TODO #2120: Handle error
254  print(f"[ModuleInfo._extract_import_information] Syntax error "
255  f"parsing '{self._filename} - ignored")
256  # Hide syntax errors
257  return
258  for use in walk(parse_tree, Use_Stmt):
259  # Ignore intrinsic modules:
260  if str(use.items[0]) == "INTRINSIC":
261  continue
262 
263  mod_name = str(use.items[2])
264  self._used_modules_used_modules.add(mod_name)
265  all_symbols = set()
266 
267  only_list = use.items[4]
268  # If there is no only_list, then the set of symbols
269  # will stay empty
270  if only_list:
271  # Parse the only list:
272  for symbol in only_list.children:
273  all_symbols.add(str(symbol))
274 
275  self._used_symbols_from_module_used_symbols_from_module[mod_name] = all_symbols
276 
277  # ------------------------------------------------------------------------
278  def get_used_modules(self):
279  '''This function returns a set of all modules `used` in this
280  module. Fortran `intrinsic` modules will be ignored. The information
281  is based on the fparser parse tree of the module (since fparser can
282  handle more files than PSyir, like LFRic's `constants_mod` which has
283  pre-processor directives).
284 
285  :returns: a set with all imported module names.
286  :rtype: set[str]
287 
288  '''
289  if self._used_modules_used_modules is None:
290  self._extract_import_information_extract_import_information()
291 
292  return self._used_modules_used_modules
293 
294  # ------------------------------------------------------------------------
296  '''This function returns information about which modules are used by
297  this module, and also which symbols are imported. The return value is
298  a dictionary with the used module name as key, and a set of all
299  imported symbol names as value.
300 
301  :returns: a dictionary that gives for each module name the set \
302  of symbols imported from it.
303  :rtype: dict[str, set[str]]
304 
305  '''
306  if self._used_symbols_from_module_used_symbols_from_module is None:
307  self._extract_import_information_extract_import_information()
308 
309  return self._used_symbols_from_module_used_symbols_from_module
310 
311  # ------------------------------------------------------------------------
312  def get_psyir(self):
313  '''Returns the PSyIR representation of this module. This is based
314  on the fparser tree (see get_parse_tree), and the information is
315  cached. If the PSyIR must be modified, it needs to be copied,
316  otherwise the modified tree will be returned from the cache in the
317  future.
318  If the conversion to PSyIR fails, a dummy FileContainer with an
319  empty Container (module) is returned, which avoids additional error
320  handling in many other subroutines.
321  #TODO 2120: This should be revisited when improving on the error
322  handling.
323 
324  :param routine_name: optional the name of a routine.
325  :type routine_name: Optional[str]
326 
327  :returns: PSyIR representing this module.
328  :rtype: list[:py:class:`psyclone.psyir.nodes.Node`]
329 
330  '''
331  if self._psyir_psyir is None:
332  try:
333  self._psyir_psyir = \
334  self._processor_processor.generate_psyir(self.get_parse_treeget_parse_tree())
335  except (KeyError, SymbolError, InternalError,
336  FortranSyntaxError) as err:
337  print(f"Error trying to parse '{self.filename}': '{err}'")
338  # TODO #11: Add proper logging
339  # TODO #2120: Handle error better. Long term we should not
340  # just ignore errors.
341  # Create a dummy FileContainer with a dummy module. This avoids
342  # additional error handling in other subroutines, since they
343  # will all return 'no information', whatever you ask for
344  self._psyir_psyir = FileContainer(os.path.basename(self._filename_filename))
345  module = Container("invalid-module")
346  self._psyir_psyir.children.append(module)
347 
348  # TODO #2462: needs to be fixed to properly support multiple modules
349  # in one file
350  # Return the actual module Container (not the FileContainer)
351  return self._psyir_psyir.children[0]
352 
353  # ------------------------------------------------------------------------
354  def resolve_routine(self, routine_name):
355  '''This function returns a list of function names that might be
356  actually called when the routine `name` is called. In most cases
357  this is exactly name, but in case of a generic subroutine the
358  name might change. For now (since we cannot resolve generic
359  interfaces yet), we return the list of all possible functions that
360  might be called.
361 
362  :param str routine_name: the name of the routine to resolve
363 
364  :returns: list of routine name(s) that could be called.
365  :rtype: list[str]
366 
367  '''
368  # TODO #2422: once #2422 is done, this can be moved into the PSyIR
369  if self._psyir_psyir is None:
370  self.get_psyirget_psyir()
371  routine_name = routine_name.lower()
372  if routine_name not in self._generic_interfaces_generic_interfaces:
373  return [routine_name]
374 
375  # If a generic interface name is queried, return a copy
376  # of all possible routine names that might be called:
377  return self._generic_interfaces_generic_interfaces[routine_name][:]
def resolve_routine(self, routine_name)
Definition: module_info.py:354
def contains_routine(self, routine_name)
Definition: module_info.py:218