"""Check Python modules and C API for coverage. Mostly written by Josip Dzolonga for the Google Highly Open Participation contest. """ from __future__ import annotations import glob import inspect import pickle import pkgutil import re import sys from importlib import import_module from os import path from typing import IO, TYPE_CHECKING, Any, TextIO import sphinx from sphinx.builders import Builder from sphinx.locale import __ from sphinx.util import logging from sphinx.util.console import red from sphinx.util.inspect import safe_getattr if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Sequence, Set from sphinx.application import Sphinx from sphinx.util.typing import ExtensionMetadata logger = logging.getLogger(__name__) # utility def write_header(f: IO[str], text: str, char: str = '-') -> None: f.write(text + '\n') f.write(char * len(text) + '\n\n') def compile_regex_list(name: str, exps: str) -> list[re.Pattern[str]]: lst = [] for exp in exps: try: lst.append(re.compile(exp)) except Exception: logger.warning(__('invalid regex %r in %s'), exp, name) return lst def _write_table(table: list[list[str]]) -> Iterator[str]: sizes = [max(len(x[column]) for x in table) + 1 for column in range(len(table[0]))] yield _add_line(sizes, '-') yield from _add_row(sizes, table[0], '=') for row in table[1:]: yield from _add_row(sizes, row, '-') def _add_line(sizes: list[int], separator: str) -> str: return '+' + ''.join((separator * (size + 1)) + '+' for size in sizes) def _add_row(col_widths: list[int], columns: list[str], separator: str) -> Iterator[str]: yield ''.join(f'| {column: <{col_widths[i]}}' for i, column in enumerate(columns)) + '|' yield _add_line(col_widths, separator) def _load_modules(mod_name: str, ignored_module_exps: Iterable[re.Pattern[str]]) -> Set[str]: """Recursively load all submodules. :param mod_name: The name of a module to load submodules for. :param ignored_module_exps: A list of regexes for modules to ignore. :returns: A set of modules names including the provided module name, ``mod_name`` :raises ImportError: If the module indicated by ``mod_name`` could not be loaded. """ if any(exp.match(mod_name) for exp in ignored_module_exps): return set() # This can raise an exception, which must be handled by the caller. mod = import_module(mod_name) modules = {mod_name} if mod.__spec__ is None: return modules search_locations = mod.__spec__.submodule_search_locations for (_, sub_mod_name, sub_mod_ispkg) in pkgutil.iter_modules(search_locations): if sub_mod_name == '__main__': continue if sub_mod_ispkg: modules |= _load_modules(f'{mod_name}.{sub_mod_name}', ignored_module_exps) else: if any(exp.match(sub_mod_name) for exp in ignored_module_exps): continue modules.add(f'{mod_name}.{sub_mod_name}') return modules def _determine_py_coverage_modules( coverage_modules: Sequence[str], seen_modules: Set[str], ignored_module_exps: Iterable[re.Pattern[str]], py_undoc: dict[str, dict[str, Any]], ) -> list[str]: """Return a sorted list of modules to check for coverage. Figure out which of the two operating modes to use: - If 'coverage_modules' is not specified, we check coverage for all modules seen in the documentation tree. Any objects found in these modules that are not documented will be noted. This will therefore only identify missing objects, but it requires no additional configuration. - If 'coverage_modules' is specified, we check coverage for all modules specified in this configuration value. Any objects found in these modules that are not documented will be noted. In addition, any objects from other modules that are documented will be noted. This will therefore identify both missing modules and missing objects, but it requires manual configuration. """ if not coverage_modules: return sorted(seen_modules) modules: set[str] = set() for mod_name in coverage_modules: try: modules |= _load_modules(mod_name, ignored_module_exps) except ImportError as err: # TODO(stephenfin): Define a subtype for all logs in this module logger.warning(__('module %s could not be imported: %s'), mod_name, err) py_undoc[mod_name] = {'error': err} continue # if there are additional modules then we warn but continue scanning if additional_modules := seen_modules - modules: logger.warning( __('the following modules are documented but were not specified ' 'in coverage_modules: %s'), ', '.join(additional_modules), ) # likewise, if there are missing modules we warn but continue scanning if missing_modules := modules - seen_modules: logger.warning( __('the following modules are specified in coverage_modules ' 'but were not documented'), ', '.join(missing_modules), ) return sorted(modules) class CoverageBuilder(Builder): """ Evaluates coverage of code in the documentation. """ name = 'coverage' epilog = __('Testing of coverage in the sources finished, look at the ' 'results in %(outdir)s' + path.sep + 'python.txt.') def init(self) -> None: self.c_sourcefiles: list[str] = [] for pattern in self.config.coverage_c_path: pattern = path.join(self.srcdir, pattern) self.c_sourcefiles.extend(glob.glob(pattern)) self.c_regexes: list[tuple[str, re.Pattern[str]]] = [] for (name, exp) in self.config.coverage_c_regexes.items(): try: self.c_regexes.append((name, re.compile(exp))) except Exception: logger.warning(__('invalid regex %r in coverage_c_regexes'), exp) self.c_ignorexps: dict[str, list[re.Pattern[str]]] = {} for (name, exps) in self.config.coverage_ignore_c_items.items(): self.c_ignorexps[name] = compile_regex_list('coverage_ignore_c_items', exps) self.mod_ignorexps = compile_regex_list('coverage_ignore_modules', self.config.coverage_ignore_modules) self.cls_ignorexps = compile_regex_list('coverage_ignore_classes', self.config.coverage_ignore_classes) self.fun_ignorexps = compile_regex_list('coverage_ignore_functions', self.config.coverage_ignore_functions) self.py_ignorexps = compile_regex_list('coverage_ignore_pyobjects', self.config.coverage_ignore_pyobjects) def get_outdated_docs(self) -> str: return 'coverage overview' def write_documents(self, _docnames: Set[str]) -> None: self.py_undoc: dict[str, dict[str, Any]] = {} self.py_undocumented: dict[str, Set[str]] = {} self.py_documented: dict[str, Set[str]] = {} self.build_py_coverage() self.write_py_coverage() self.c_undoc: dict[str, Set[tuple[str, str]]] = {} self.build_c_coverage() self.write_c_coverage() def build_c_coverage(self) -> None: c_objects = {} for obj in self.env.domains.c_domain.get_objects(): c_objects[obj[2]] = obj[1] for filename in self.c_sourcefiles: undoc: set[tuple[str, str]] = set() with open(filename, encoding="utf-8") as f: for line in f: for key, regex in self.c_regexes: match = regex.match(line) if match: name = match.groups()[0] if key not in c_objects: undoc.add((key, name)) continue if name not in c_objects[key]: for exp in self.c_ignorexps.get(key, []): if exp.match(name): break else: undoc.add((key, name)) continue if undoc: self.c_undoc[filename] = undoc def write_c_coverage(self) -> None: output_file = path.join(self.outdir, 'c.txt') with open(output_file, 'w', encoding="utf-8") as op: if self.config.coverage_write_headline: write_header(op, 'Undocumented C API elements', '=') op.write('\n') for filename, undoc in self.c_undoc.items(): write_header(op, filename) for typ, name in sorted(undoc): op.write(' * %-50s [%9s]\n' % (name, typ)) if self.config.coverage_show_missing_items: if self.app.quiet: logger.warning(__('undocumented c api: %s [%s] in file %s'), name, typ, filename) else: logger.info(red('undocumented ') + 'c ' + 'api ' + '%-30s' % (name + " [%9s]" % typ) + red(' - in file ') + filename) op.write('\n') def ignore_pyobj(self, full_name: str) -> bool: return any( exp.search(full_name) for exp in self.py_ignorexps ) def build_py_coverage(self) -> None: seen_objects = frozenset(self.env.domaindata['py']['objects']) seen_modules = frozenset(self.env.domaindata['py']['modules']) skip_undoc = self.config.coverage_skip_undoc_in_source modules = _determine_py_coverage_modules( self.config.coverage_modules, seen_modules, self.mod_ignorexps, self.py_undoc, ) for mod_name in modules: ignore = False for exp in self.mod_ignorexps: if exp.match(mod_name): ignore = True break if ignore or self.ignore_pyobj(mod_name): continue try: mod = import_module(mod_name) except ImportError as err: logger.warning(__('module %s could not be imported: %s'), mod_name, err) self.py_undoc[mod_name] = {'error': err} continue documented_objects: set[str] = set() undocumented_objects: set[str] = set() funcs = [] classes: dict[str, list[str]] = {} for name, obj in inspect.getmembers(mod): # diverse module attributes are ignored: if name[0] == '_': # begins in an underscore continue if not hasattr(obj, '__module__'): # cannot be attributed to a module continue if obj.__module__ != mod_name: # is not defined in this module continue full_name = f'{mod_name}.{name}' if self.ignore_pyobj(full_name): continue if inspect.isfunction(obj): if full_name not in seen_objects: for exp in self.fun_ignorexps: if exp.match(name): break else: if skip_undoc and not obj.__doc__: continue funcs.append(name) undocumented_objects.add(full_name) else: documented_objects.add(full_name) elif inspect.isclass(obj): for exp in self.cls_ignorexps: if exp.match(name): break else: if full_name not in seen_objects: if skip_undoc and not obj.__doc__: continue # not documented at all classes[name] = [] continue attrs: list[str] = [] for attr_name in dir(obj): if attr_name not in obj.__dict__: continue try: attr = safe_getattr(obj, attr_name) except AttributeError: continue if not (inspect.ismethod(attr) or inspect.isfunction(attr)): continue if attr_name[0] == '_': # starts with an underscore, ignore it continue if skip_undoc and not attr.__doc__: # skip methods without docstring if wished continue full_attr_name = f'{full_name}.{attr_name}' if self.ignore_pyobj(full_attr_name): continue if full_attr_name not in seen_objects: attrs.append(attr_name) undocumented_objects.add(full_attr_name) else: documented_objects.add(full_attr_name) if attrs: # some attributes are undocumented classes[name] = attrs self.py_undoc[mod_name] = {'funcs': funcs, 'classes': classes} self.py_undocumented[mod_name] = undocumented_objects self.py_documented[mod_name] = documented_objects def _write_py_statistics(self, op: TextIO) -> None: """Outputs the table of ``op``.""" all_modules = frozenset(self.py_documented.keys() | self.py_undocumented.keys()) all_objects: Set[str] = set() all_documented_objects: Set[str] = set() for module in all_modules: all_objects |= self.py_documented[module] | self.py_undocumented[module] all_documented_objects |= self.py_documented[module] # prepare tabular table = [['Module', 'Coverage', 'Undocumented']] for module in sorted(all_modules): module_objects = self.py_documented[module] | self.py_undocumented[module] if len(module_objects): value = 100.0 * len(self.py_documented[module]) / len(module_objects) else: value = 100.0 table.append([module, '%.2f%%' % value, '%d' % len(self.py_undocumented[module])]) if all_objects: table.append([ 'TOTAL', f'{100 * len(all_documented_objects) / len(all_objects):.2f}%', f'{len(all_objects) - len(all_documented_objects)}', ]) else: table.append(['TOTAL', '100', '0']) for line in _write_table(table): op.write(f'{line}\n') def write_py_coverage(self) -> None: output_file = path.join(self.outdir, 'python.txt') failed = [] with open(output_file, 'w', encoding="utf-8") as op: if self.config.coverage_write_headline: write_header(op, 'Undocumented Python objects', '=') if self.config.coverage_statistics_to_stdout: self._write_py_statistics(sys.stdout) if self.config.coverage_statistics_to_report: write_header(op, 'Statistics') self._write_py_statistics(op) op.write('\n') keys = sorted(self.py_undoc.keys()) for name in keys: undoc = self.py_undoc[name] if 'error' in undoc: failed.append((name, undoc['error'])) else: if not undoc['classes'] and not undoc['funcs']: continue write_header(op, name) if undoc['funcs']: op.write('Functions:\n') op.writelines(' * %s\n' % x for x in undoc['funcs']) if self.config.coverage_show_missing_items: if self.app.quiet: for func in undoc['funcs']: logger.warning( __('undocumented python function: %s :: %s'), name, func) else: for func in undoc['funcs']: logger.info(red('undocumented ') + 'py ' + 'function ' + '%-30s' % func + red(' - in module ') + name) op.write('\n') if undoc['classes']: op.write('Classes:\n') for class_name, methods in sorted( undoc['classes'].items()): if not methods: op.write(' * %s\n' % class_name) if self.config.coverage_show_missing_items: if self.app.quiet: logger.warning( __('undocumented python class: %s :: %s'), name, class_name) else: logger.info(red('undocumented ') + 'py ' + 'class ' + '%-30s' % class_name + red(' - in module ') + name) else: op.write(' * %s -- missing methods:\n\n' % class_name) op.writelines(' - %s\n' % x for x in methods) if self.config.coverage_show_missing_items: if self.app.quiet: for meth in methods: logger.warning( __('undocumented python method:' ' %s :: %s :: %s'), name, class_name, meth) else: for meth in methods: logger.info(red('undocumented ') + 'py ' + 'method ' + '%-30s' % (class_name + '.' + meth) + red(' - in module ') + name) op.write('\n') if failed: write_header(op, 'Modules that failed to import') op.writelines(' * %s -- %s\n' % x for x in failed) def finish(self) -> None: # dump the coverage data to a pickle file too picklepath = path.join(self.outdir, 'undoc.pickle') with open(picklepath, 'wb') as dumpfile: pickle.dump((self.py_undoc, self.c_undoc, self.py_undocumented, self.py_documented), dumpfile) def setup(app: Sphinx) -> ExtensionMetadata: app.add_builder(CoverageBuilder) app.add_config_value('coverage_modules', (), '', types={tuple, list}) app.add_config_value('coverage_ignore_modules', [], '') app.add_config_value('coverage_ignore_functions', [], '') app.add_config_value('coverage_ignore_classes', [], '') app.add_config_value('coverage_ignore_pyobjects', [], '') app.add_config_value('coverage_c_path', [], '') app.add_config_value('coverage_c_regexes', {}, '') app.add_config_value('coverage_ignore_c_items', {}, '') app.add_config_value('coverage_write_headline', True, '') app.add_config_value('coverage_statistics_to_report', True, '', bool) app.add_config_value('coverage_statistics_to_stdout', True, '', bool) app.add_config_value('coverage_skip_undoc_in_source', False, '') app.add_config_value('coverage_show_missing_items', False, '') return {'version': sphinx.__display_version__, 'parallel_read_safe': True}