| #!/usr/bin/env python3 |
| # |
| # Copyright 2023 The Cobalt Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| """Detects incorrect standard library uses in the Cobalt layer. |
| |
| It uses the 'nm' tool to look at the weak/undefined symbols |
| in the resulting cobalt shared library. Since everything should go through |
| Starboard, most weak or undefined symbols are "Starboard leaks" where we are |
| accidentally calling the system libraries when we shouldn't. |
| |
| This script will load a file that contains a list of Allowed C99 Symbols that |
| are not considered leaks. Additionally, it will also try to read in a list of |
| libraries to ignore when searching for leaks. These symbols and libraries cannot |
| be overridden and the only way to include them in the output is to remove them |
| from these files. |
| |
| This script can also make use of a manifest file: the set of currently known |
| leaking symbols for the build. This script can also be used to update the |
| manifest. --manifest can be used to output an updated manifest and the output |
| can be redirected to the build's manifest file. |
| |
| Exits with 1 if run in --submit-check mode and any leaks are either introduced |
| or removed compared with the manifest, otherwise exits with 0. |
| """ |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import argparse |
| import collections |
| import os |
| import re |
| import subprocess |
| import sys |
| |
| from starboard.tools import paths |
| |
| # pylint: disable=line-too-long |
| _API_LEAK_DETECTOR_TITLE = """ ___ ____ ____ __ __ ____ __ __ |
| / | / __ \\/ _/ / / ___ ____ _/ /__ / __ \\___ / /____ _____/ /_____ _____ |
| / /| | / /_/ // / / / / _ \\/ __ `/ //_/ / / / / _ \\/ __/ _ \\/ ___/ __/ __ \\/ ___/ |
| / ___ |/ ____// / / /___/ __/ /_/ / ,< / /_/ / __/ /_/ __/ /__/ /_/ /_/ / / |
| /_/ |_/_/ /___/ /_____/\\___/\\__,_/_/|_| /_____/\\___/\\__/\\___/\\___/\\__/\\____/_/ |
| """ |
| # pylint: enable=line-too-long |
| |
| # Leaks will be checked against this build. |
| |
| _DEFAULT_PLATFORM = 'evergreen-x64' |
| _DEFAULT_CONFIG = 'gold' |
| _DEFAULT_TARGET = 'cobalt' |
| _DEFAULT_SB_VERSION = 16 |
| |
| _RE_LIB = re.compile(r'lib.*\.a$') |
| _RE_FILE = re.compile(r'\/\/.*\.[hcp]+$') |
| _RE_SYMBOL_AND_ANY_VERSION_INFO = re.compile(r'(([^@]+)@@?(.+))|([^@]+)') |
| _RE_ALLOWED_C99_SYMBOL = re.compile(r'^\*\s(.*)$') |
| |
| # Manifests can be automatically generated by the tool but files for allowlisted |
| # libraries and C99 symbols should be edited by hand. |
| _LIBRARIES_TO_IGNORE_PATH = os.path.join( |
| os.path.dirname(__file__), 'libraries_to_ignore') |
| _ALLOWED_C99_SYMBOLS_PATH = os.path.join(paths.STARBOARD_ROOT, 'doc', 'c99.md') |
| _DEFAULT_RELATIVE_MANIFEST_PATH = os.path.join('evergreen', 'manifest') |
| |
| # Messages placed at the header of auto-generated files. |
| _MANIFEST_HEADER = ('# Manifest of Leaking Files\n\n' + |
| '# This file was auto-generated using ' + |
| 'api_leak_detector.py.\n') |
| |
| # Values to stand in for unknown libraries and source files. |
| _UNKNOWN_LIBRARIES = 'unknown_library(ies)' |
| _UNKNOWN_SOURCE_FILES = 'unknown_source_file(s)' |
| |
| # Allowed POSIX symbols in Starboard 16 |
| _ALLOWED_SB16_POSIX_SYMBOLS = [ |
| 'accept', |
| 'bind', |
| 'calloc', |
| 'connect', |
| 'clock_gettime', |
| 'close', |
| 'free', |
| 'freeifaddrs', |
| 'freeaddrinfo', |
| 'gettimeofday', |
| 'getifaddrs', |
| 'getaddrinfo', |
| 'gmtime_r', |
| 'inet_ntop', |
| 'listen', |
| 'malloc', |
| 'posix_memalign', |
| 'recv', |
| 'recvfrom', |
| 'realloc', |
| 'setsockopt', |
| 'send', |
| 'sendto', |
| 'strcasecmp', |
| 'strncasecmp', |
| 'socket', |
| 'time', |
| 'mmap', |
| 'munmap', |
| 'mprotect', |
| 'msync', |
| 'stat', |
| 'pthread_cond_broadcast', |
| 'pthread_cond_destroy', |
| 'pthread_cond_init', |
| 'pthread_cond_signal', |
| 'pthread_cond_timedwait', |
| 'pthread_cond_wait', |
| 'pthread_condattr_destroy' |
| 'pthread_condattr_getclock', |
| 'pthread_condattr_init', |
| 'pthread_condattr_setclock', |
| 'pthread_mutex_destroy', |
| 'pthread_mutex_init', |
| 'pthread_mutex_lock', |
| 'pthread_mutex_unlock', |
| 'pthread_mutex_trylock', |
| 'pthread_once', |
| 'mkdir', |
| ] |
| |
| |
| def DiffWithManifest(leaked_symbols, manifest_path): |
| manifest_symbols = LoadManifest(manifest_path) |
| introduced = leaked_symbols.difference(manifest_symbols) |
| removed = manifest_symbols.difference(leaked_symbols) |
| return introduced, removed |
| |
| |
| def FindLibraries(config_path): |
| """Returns all non-ignored static libraries in the build config directory.""" |
| print('Loading libraries to ignore...', file=sys.stderr) |
| libs_to_ignore = LoadLibrariesToIgnore() |
| |
| libs = [] |
| for root, dirs, filenames in os.walk(config_path): |
| # Only look in toplevel obj directory. |
| if root == config_path: |
| dirs[:] = [d for d in dirs if d == 'obj'] |
| for filename in filenames: |
| if _RE_LIB.match(filename) and filename not in libs_to_ignore: |
| libs.append(os.path.join(root, filename)) |
| return libs |
| |
| |
| def FindLeakingSourceFiles(leaking_symbols, nm_output, config_dir): |
| r"""Collects and formats all filenames that have leaks in the nm output. |
| |
| This function works by iterating through the provided output, keeping track |
| of the current file it is in by maintaining a filename variable. When a |
| valid leaking symbol is found, it will add the current filename to the set |
| of files that contain the specified leak. Once the entire output has been |
| iterated across, the dictionary of leaks to sets of files is returned. |
| |
| The absolute path of every file, such as: |
| |
| /usr/local/google/home/chadduffin/cobalt/src/out/ \ |
| evergreen-x64-sbversion-12_gold/obj/third_party/boringssl/src/crypto/ \ |
| crypto.thread.c.o |
| |
| will be stripped so that it becomes: |
| |
| //third_party/boringssl/src/crypto/thread.c |
| |
| Args: |
| leaking_symbols: A list of all of the symbols that are valid leaks. |
| nm_output: The output from 'nm -u' being ran against a static library. |
| config_dir: Config build directory |
| |
| Returns: |
| A dictionary that maps symbols to a set of filenames that are leaking the |
| symbol. |
| """ |
| files = {} |
| filename = None |
| for line in ProcessNmOutput(nm_output, True): |
| if config_dir in line: |
| |
| # See the description above to understand what the string manipulation |
| # below is accomplishing. |
| try: |
| filename = '//' + line.split(f'/{config_dir}/obj/')[1] |
| except IndexError as e: |
| print(line) |
| raise e |
| if filename.endswith('.asm.o:'): |
| filename = re.sub(r':$', '', filename) |
| continue |
| filename = re.sub(r'\.o:$', '', filename) |
| filename = re.sub(r'/\w+\.', '/', filename) |
| continue |
| elif filename and line and line in leaking_symbols: |
| files_set = files.get(line, set()) |
| files_set.add(filename) |
| files[line] = files_set |
| return files |
| |
| |
| def InversedKeys(nested_dict): |
| """Returns a copy of a 2D dict with its inner and outer keys swapped.""" |
| inverse = collections.defaultdict(dict) |
| for outer_key, outer_value in nested_dict.items(): |
| for inner_key, inner_value in outer_value.items(): |
| inverse[inner_key][outer_key] = inner_value |
| |
| return inverse |
| |
| |
| def FindLeakLocations(leaked_symbols, config_path): |
| """Returns the static libraries and source files leaked APIs are used in. |
| |
| Args: |
| leaked_symbols: The set of leaked symbols found in the Cobalt layer. |
| config_path: The path to the build config directory. |
| |
| Returns: |
| A dict from leaked symbol to dict from static library to the set of source |
| files using the leak. |
| """ |
| config_dir = os.path.basename(config_path) |
| leaking_files = {} |
| libs_to_symbols = {} |
| |
| print('Collecting static libraries...', file=sys.stderr) |
| libs = FindLibraries(config_path) |
| |
| print('Searching the static libraries for leaks...', file=sys.stderr) |
| for lib in libs: |
| print(lib) |
| libname = os.path.basename(lib) |
| nm_output = RunCommand(['nm', '-u', lib]) |
| libs_to_symbols[libname] = set() |
| for symbol in ProcessNmOutput(nm_output): |
| if symbol not in leaked_symbols: |
| continue |
| libs_to_symbols[libname].add(symbol) |
| if not libs_to_symbols[libname]: |
| del libs_to_symbols[libname] |
| continue |
| leaking_files[libname] = FindLeakingSourceFiles(libs_to_symbols[libname], |
| nm_output, config_dir) |
| if not leaking_files[libname]: |
| del leaking_files[libname] |
| |
| # It's possible for something that is not a Cobalt static library to |
| # contribute leaked symbols. For example, some static libraries come |
| # preinstalled on Linux. |
| leaked_with_libraries = {s for v in libs_to_symbols.values() for s in v} |
| leaked_without_libraries = leaked_symbols.difference(leaked_with_libraries) |
| if leaked_without_libraries: |
| leaking_files[_UNKNOWN_LIBRARIES] = { |
| symbol: {_UNKNOWN_SOURCE_FILES} for symbol in leaked_without_libraries |
| } |
| |
| return InversedKeys(leaking_files) |
| |
| |
| def LoadAllowedC99Symbols(): |
| allowed_c99_symbols = set() |
| |
| with open(_ALLOWED_C99_SYMBOLS_PATH, encoding='utf-8') as f: |
| for line in f: |
| line = line.strip() |
| m = _RE_ALLOWED_C99_SYMBOL.match(line) |
| if m: |
| allowed_c99_symbols.add(m.group(1)) |
| |
| return allowed_c99_symbols |
| |
| |
| def LoadManifest(manifest_path): |
| symbols = set() |
| |
| with open(manifest_path, encoding='utf-8') as f: |
| for line in f: |
| line = line.strip() |
| if line and line[0] != '#': |
| symbols.add(line) |
| |
| return symbols |
| |
| |
| def LoadLibrariesToIgnore(): |
| # We should *always* ignore libstarboard_platform.a. |
| libs = set(['libstarboard_platform.a']) |
| |
| # Read in the ignored libraries. |
| try: |
| with open(_LIBRARIES_TO_IGNORE_PATH, 'r', encoding='utf-8') as f: |
| for line in f: |
| line = line.strip() |
| if not line or line[0] == '#': |
| continue |
| else: |
| libs.add(line) |
| except IOError as e: |
| print(f"Error opening {_LIBRARIES_TO_IGNORE_PATH}: '{e}'", file=sys.stderr) |
| print('Continuing...', file=sys.stderr) |
| return libs |
| |
| |
| def ParseArgs(): |
| """Parse all of the arguments provided on the command line.""" |
| parser = argparse.ArgumentParser() |
| mode = parser.add_mutually_exclusive_group(required=True) |
| mode.add_argument( |
| '--submit-check', |
| help='Checks if any leaked symbols are entirely introduced or removed.', |
| action='store_true') |
| mode.add_argument( |
| '--inspect', |
| help='Outputs all currently leaking symbols and their uses.', |
| action='store_true') |
| mode.add_argument( |
| '--manifest', |
| help='Outputs all currently leaking symbols.', |
| action='store_true') |
| parser.add_argument( |
| '-p', |
| '--platform', |
| default=_DEFAULT_PLATFORM, |
| help="Device platform, eg 'evergreen-x64-sbversion-12'.") |
| parser.add_argument( |
| '-c', |
| '--config', |
| default=_DEFAULT_CONFIG, |
| help="Build config (eg, 'gold','qa' or 'devel')") |
| parser.add_argument( |
| '-t', |
| '--target', |
| help="Build target to check (eg, 'cobalt' or 'd8')", |
| default=_DEFAULT_TARGET) |
| parser.add_argument( |
| '--relative-manifest-path', |
| help='Path to the manifest to use, relative to api_leak_detector dir.', |
| default=_DEFAULT_RELATIVE_MANIFEST_PATH) |
| parser.add_argument( |
| '--sb_api_version', |
| help='The Starboard version', |
| type=int, |
| default=_DEFAULT_SB_VERSION) |
| return parser.parse_args() |
| |
| |
| def PrettyPrint(output, indent=0, inc=2, file=sys.stderr): |
| """Alternative to pprint that produces better output for our use case.""" |
| if isinstance(output, dict): |
| if indent == 0: |
| newline = 1 |
| else: |
| newline = 0 |
| for key, value in sorted(output.items()): |
| print('{}{}{}'.format('\n' * newline, ' ' * indent, key), file=file) |
| PrettyPrint(value, indent + inc) |
| elif isinstance(output, set): |
| for value in sorted(output): |
| print(f"{' ' * indent}{value}", file=file) |
| else: |
| print(f"{' ' * indent}{output}", file=file) |
| |
| |
| def ProcessNmOutput(nm_output, collect_files=False): |
| """Parses the 'nm' output and collects the matched and unresolved symbols. |
| |
| This function retains all of the allowlisted libraries that were loaded on |
| startup. |
| |
| Args: |
| nm_output: The output from a previously ran 'nm -u' command. |
| collect_files: Identifies whether or not to include files in the results. |
| This increases the reusability of the function (see |
| FindLeakingSourceFiles). |
| |
| Yields: |
| Unresolved symbols that match _RE_SYMBOL_AND_ANY_VERSION_INFO. |
| """ |
| for line in nm_output.decode('utf-8').splitlines(): |
| line = line.lstrip() |
| contents = line.split(' ') |
| if len(contents) != 2: |
| if len(contents) == 1 and collect_files: |
| yield contents[0] |
| continue |
| results = _RE_SYMBOL_AND_ANY_VERSION_INFO.match(contents[1]).groups() |
| if results[3]: # True if the symbol has no associated version information. |
| yield results[3] |
| elif results[1]: # True if the symbol does have version information. |
| yield results[1] |
| else: |
| print(f'Invalid line in nm output: {line}', file=sys.stderr) |
| |
| |
| def RunCommand(args): |
| """Executes a command with the given arguments and returns the output.""" |
| return subprocess.check_output(args) |
| |
| |
| def main(): |
| args = ParseArgs() |
| config_dir = f'{args.platform}_{args.config}' |
| config_path = os.path.join(paths.BUILD_OUTPUT_ROOT, config_dir) |
| manifest_path = os.path.join( |
| os.path.dirname(__file__), args.relative_manifest_path) |
| |
| print(_API_LEAK_DETECTOR_TITLE, file=sys.stderr) |
| print( |
| 'Analyzing for Starboard version ', args.sb_api_version, file=sys.stderr) |
| print('Loading allowed C99 symbols...', file=sys.stderr) |
| allowed_c99_symbols = LoadAllowedC99Symbols() |
| |
| print(f'Building {config_dir} if necessary...', file=sys.stderr) |
| RunCommand(['ninja', '-j256', '-C', config_path, args.target]) |
| |
| # First try using the GYP shared library path 'lib/libcobalt.so'. |
| # TODO(b/211885836): stop considering the GYP shared library path once all |
| # relevant GYP builders are removed from CI. |
| binary_path = os.path.join(config_path, 'lib', f'lib{args.target}.so') |
| # Then fall back on the GN shared library path 'libcobalt.so'. |
| if not os.path.exists(binary_path): |
| binary_path = os.path.join(config_path, f'lib{args.target}.so') |
| # Then fall back on the executable path 'cobalt', used by both GYP and GN. |
| if not os.path.exists(binary_path): |
| binary_path = os.path.join(config_path, format(args.target)) |
| print(f'Analyzing: {binary_path}', file=sys.stderr) |
| |
| # Get all of the unresolved symbols of the binary. |
| nm_output = RunCommand([ |
| 'nm', |
| '-u', |
| '-C', # Demangle C++ symbols |
| binary_path |
| ]) |
| |
| # Build a set from all of the unresolved symbols that have the allowlisted and |
| # Starboard functions and extern configuration variables filtered out. |
| def IsSbSymbol(symbol): |
| return symbol.startswith('Sb') or symbol.startswith('kSb') |
| |
| def IsAllowedPosixSymbol(symbol, sb_api_version: int): |
| if sb_api_version == 16: |
| return symbol in _ALLOWED_SB16_POSIX_SYMBOLS |
| else: |
| return False |
| |
| def IsAllowedSymbol(symbol): |
| if symbol in allowed_c99_symbols: |
| return True |
| |
| if IsSbSymbol(symbol): |
| return True |
| |
| if IsAllowedPosixSymbol(symbol, sb_api_version=args.sb_api_version): |
| return True |
| |
| return False |
| |
| leaked_symbols = set( |
| symbol for symbol in ProcessNmOutput(nm_output) \ |
| if not IsAllowedSymbol(symbol) |
| ) |
| |
| if args.manifest: |
| print('Done!', file=sys.stderr) |
| print(_MANIFEST_HEADER) |
| PrettyPrint(leaked_symbols, file=sys.stdout) |
| return 0 |
| |
| if args.submit_check: |
| introduced, removed = DiffWithManifest(leaked_symbols, manifest_path) |
| if introduced: |
| PrettyPrint( |
| {'Leaks introduced:': FindLeakLocations(introduced, config_path)}) |
| print( |
| '\nPlease see advice for addressing new leaks at go/cobalt-api-leaks.' |
| ) |
| else: |
| print('\nNo leaks were introduced.', file=sys.stderr) |
| |
| if removed: |
| PrettyPrint({'Leaks removed:': removed}) |
| print('\nPlease delete removed leaks from the manifest file.') |
| else: |
| print('No leaks were removed.', file=sys.stderr) |
| |
| # We counterintuitively still want to fail even if leaks are only removed |
| # so that a CL author knows to update the manifest. |
| return 1 if introduced or removed else 0 |
| |
| if args.inspect: |
| if leaked_symbols: |
| PrettyPrint(FindLeakLocations(leaked_symbols, config_path)) |
| else: |
| print('No leaks found!', file=sys.stderr) |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |