blob: c5948213c3e5a0d62a660d7908357c97f18d67d6 [file] [log] [blame]
#!/usr/bin/env python3
#
# Copyright 2023 The Cobalt Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Detects incorrect standard library uses in the Cobalt layer.
It uses the 'nm' tool to look at the weak/undefined symbols
in the resulting cobalt shared library. Since everything should go through
Starboard, most weak or undefined symbols are "Starboard leaks" where we are
accidentally calling the system libraries when we shouldn't.
This script will load a file that contains a list of Allowed C99 Symbols that
are not considered leaks. Additionally, it will also try to read in a list of
libraries to ignore when searching for leaks. These symbols and libraries cannot
be overridden and the only way to include them in the output is to remove them
from these files.
This script can also make use of a manifest file: the set of currently known
leaking symbols for the build. This script can also be used to update the
manifest. --manifest can be used to output an updated manifest and the output
can be redirected to the build's manifest file.
Exits with 1 if run in --submit-check mode and any leaks are either introduced
or removed compared with the manifest, otherwise exits with 0.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import collections
import os
import re
import subprocess
import sys
from starboard.tools import paths
# pylint: disable=line-too-long
_API_LEAK_DETECTOR_TITLE = """ ___ ____ ____ __ __ ____ __ __
/ | / __ \\/ _/ / / ___ ____ _/ /__ / __ \\___ / /____ _____/ /_____ _____
/ /| | / /_/ // / / / / _ \\/ __ `/ //_/ / / / / _ \\/ __/ _ \\/ ___/ __/ __ \\/ ___/
/ ___ |/ ____// / / /___/ __/ /_/ / ,< / /_/ / __/ /_/ __/ /__/ /_/ /_/ / /
/_/ |_/_/ /___/ /_____/\\___/\\__,_/_/|_| /_____/\\___/\\__/\\___/\\___/\\__/\\____/_/
"""
# pylint: enable=line-too-long
# Leaks will be checked against this build.
_DEFAULT_PLATFORM = 'evergreen-x64'
_DEFAULT_CONFIG = 'gold'
_DEFAULT_TARGET = 'cobalt'
_DEFAULT_SB_VERSION = 16
_RE_LIB = re.compile(r'lib.*\.a$')
_RE_FILE = re.compile(r'\/\/.*\.[hcp]+$')
_RE_SYMBOL_AND_ANY_VERSION_INFO = re.compile(r'(([^@]+)@@?(.+))|([^@]+)')
_RE_ALLOWED_C99_SYMBOL = re.compile(r'^\*\s(.*)$')
# Manifests can be automatically generated by the tool but files for allowlisted
# libraries and C99 symbols should be edited by hand.
_LIBRARIES_TO_IGNORE_PATH = os.path.join(
os.path.dirname(__file__), 'libraries_to_ignore')
_ALLOWED_C99_SYMBOLS_PATH = os.path.join(paths.STARBOARD_ROOT, 'doc', 'c99.md')
_DEFAULT_RELATIVE_MANIFEST_PATH = os.path.join('evergreen', 'manifest')
# Messages placed at the header of auto-generated files.
_MANIFEST_HEADER = ('# Manifest of Leaking Files\n\n' +
'# This file was auto-generated using ' +
'api_leak_detector.py.\n')
# Values to stand in for unknown libraries and source files.
_UNKNOWN_LIBRARIES = 'unknown_library(ies)'
_UNKNOWN_SOURCE_FILES = 'unknown_source_file(s)'
# Allowed POSIX symbols in Starboard 16
_ALLOWED_SB16_POSIX_SYMBOLS = [
'accept',
'bind',
'calloc',
'connect',
'clock_gettime',
'close',
'free',
'freeifaddrs',
'freeaddrinfo',
'gettimeofday',
'getifaddrs',
'getaddrinfo',
'gmtime_r',
'inet_ntop',
'listen',
'malloc',
'posix_memalign',
'recv',
'recvfrom',
'realloc',
'setsockopt',
'send',
'sendto',
'strcasecmp',
'strncasecmp',
'socket',
'time',
'mmap',
'munmap',
'mprotect',
'msync',
'stat',
'pthread_cond_broadcast',
'pthread_cond_destroy',
'pthread_cond_init',
'pthread_cond_signal',
'pthread_cond_timedwait',
'pthread_cond_wait',
'pthread_condattr_destroy'
'pthread_condattr_getclock',
'pthread_condattr_init',
'pthread_condattr_setclock',
'pthread_mutex_destroy',
'pthread_mutex_init',
'pthread_mutex_lock',
'pthread_mutex_unlock',
'pthread_mutex_trylock',
'pthread_once',
'mkdir',
]
def DiffWithManifest(leaked_symbols, manifest_path):
manifest_symbols = LoadManifest(manifest_path)
introduced = leaked_symbols.difference(manifest_symbols)
removed = manifest_symbols.difference(leaked_symbols)
return introduced, removed
def FindLibraries(config_path):
"""Returns all non-ignored static libraries in the build config directory."""
print('Loading libraries to ignore...', file=sys.stderr)
libs_to_ignore = LoadLibrariesToIgnore()
libs = []
for root, dirs, filenames in os.walk(config_path):
# Only look in toplevel obj directory.
if root == config_path:
dirs[:] = [d for d in dirs if d == 'obj']
for filename in filenames:
if _RE_LIB.match(filename) and filename not in libs_to_ignore:
libs.append(os.path.join(root, filename))
return libs
def FindLeakingSourceFiles(leaking_symbols, nm_output, config_dir):
r"""Collects and formats all filenames that have leaks in the nm output.
This function works by iterating through the provided output, keeping track
of the current file it is in by maintaining a filename variable. When a
valid leaking symbol is found, it will add the current filename to the set
of files that contain the specified leak. Once the entire output has been
iterated across, the dictionary of leaks to sets of files is returned.
The absolute path of every file, such as:
/usr/local/google/home/chadduffin/cobalt/src/out/ \
evergreen-x64-sbversion-12_gold/obj/third_party/boringssl/src/crypto/ \
crypto.thread.c.o
will be stripped so that it becomes:
//third_party/boringssl/src/crypto/thread.c
Args:
leaking_symbols: A list of all of the symbols that are valid leaks.
nm_output: The output from 'nm -u' being ran against a static library.
config_dir: Config build directory
Returns:
A dictionary that maps symbols to a set of filenames that are leaking the
symbol.
"""
files = {}
filename = None
for line in ProcessNmOutput(nm_output, True):
if config_dir in line:
# See the description above to understand what the string manipulation
# below is accomplishing.
try:
filename = '//' + line.split(f'/{config_dir}/obj/')[1]
except IndexError as e:
print(line)
raise e
if filename.endswith('.asm.o:'):
filename = re.sub(r':$', '', filename)
continue
filename = re.sub(r'\.o:$', '', filename)
filename = re.sub(r'/\w+\.', '/', filename)
continue
elif filename and line and line in leaking_symbols:
files_set = files.get(line, set())
files_set.add(filename)
files[line] = files_set
return files
def InversedKeys(nested_dict):
"""Returns a copy of a 2D dict with its inner and outer keys swapped."""
inverse = collections.defaultdict(dict)
for outer_key, outer_value in nested_dict.items():
for inner_key, inner_value in outer_value.items():
inverse[inner_key][outer_key] = inner_value
return inverse
def FindLeakLocations(leaked_symbols, config_path):
"""Returns the static libraries and source files leaked APIs are used in.
Args:
leaked_symbols: The set of leaked symbols found in the Cobalt layer.
config_path: The path to the build config directory.
Returns:
A dict from leaked symbol to dict from static library to the set of source
files using the leak.
"""
config_dir = os.path.basename(config_path)
leaking_files = {}
libs_to_symbols = {}
print('Collecting static libraries...', file=sys.stderr)
libs = FindLibraries(config_path)
print('Searching the static libraries for leaks...', file=sys.stderr)
for lib in libs:
print(lib)
libname = os.path.basename(lib)
nm_output = RunCommand(['nm', '-u', lib])
libs_to_symbols[libname] = set()
for symbol in ProcessNmOutput(nm_output):
if symbol not in leaked_symbols:
continue
libs_to_symbols[libname].add(symbol)
if not libs_to_symbols[libname]:
del libs_to_symbols[libname]
continue
leaking_files[libname] = FindLeakingSourceFiles(libs_to_symbols[libname],
nm_output, config_dir)
if not leaking_files[libname]:
del leaking_files[libname]
# It's possible for something that is not a Cobalt static library to
# contribute leaked symbols. For example, some static libraries come
# preinstalled on Linux.
leaked_with_libraries = {s for v in libs_to_symbols.values() for s in v}
leaked_without_libraries = leaked_symbols.difference(leaked_with_libraries)
if leaked_without_libraries:
leaking_files[_UNKNOWN_LIBRARIES] = {
symbol: {_UNKNOWN_SOURCE_FILES} for symbol in leaked_without_libraries
}
return InversedKeys(leaking_files)
def LoadAllowedC99Symbols():
allowed_c99_symbols = set()
with open(_ALLOWED_C99_SYMBOLS_PATH, encoding='utf-8') as f:
for line in f:
line = line.strip()
m = _RE_ALLOWED_C99_SYMBOL.match(line)
if m:
allowed_c99_symbols.add(m.group(1))
return allowed_c99_symbols
def LoadManifest(manifest_path):
symbols = set()
with open(manifest_path, encoding='utf-8') as f:
for line in f:
line = line.strip()
if line and line[0] != '#':
symbols.add(line)
return symbols
def LoadLibrariesToIgnore():
# We should *always* ignore libstarboard_platform.a.
libs = set(['libstarboard_platform.a'])
# Read in the ignored libraries.
try:
with open(_LIBRARIES_TO_IGNORE_PATH, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line[0] == '#':
continue
else:
libs.add(line)
except IOError as e:
print(f"Error opening {_LIBRARIES_TO_IGNORE_PATH}: '{e}'", file=sys.stderr)
print('Continuing...', file=sys.stderr)
return libs
def ParseArgs():
"""Parse all of the arguments provided on the command line."""
parser = argparse.ArgumentParser()
mode = parser.add_mutually_exclusive_group(required=True)
mode.add_argument(
'--submit-check',
help='Checks if any leaked symbols are entirely introduced or removed.',
action='store_true')
mode.add_argument(
'--inspect',
help='Outputs all currently leaking symbols and their uses.',
action='store_true')
mode.add_argument(
'--manifest',
help='Outputs all currently leaking symbols.',
action='store_true')
parser.add_argument(
'-p',
'--platform',
default=_DEFAULT_PLATFORM,
help="Device platform, eg 'evergreen-x64-sbversion-12'.")
parser.add_argument(
'-c',
'--config',
default=_DEFAULT_CONFIG,
help="Build config (eg, 'gold','qa' or 'devel')")
parser.add_argument(
'-t',
'--target',
help="Build target to check (eg, 'cobalt' or 'd8')",
default=_DEFAULT_TARGET)
parser.add_argument(
'--relative-manifest-path',
help='Path to the manifest to use, relative to api_leak_detector dir.',
default=_DEFAULT_RELATIVE_MANIFEST_PATH)
parser.add_argument(
'--sb_api_version',
help='The Starboard version',
type=int,
default=_DEFAULT_SB_VERSION)
return parser.parse_args()
def PrettyPrint(output, indent=0, inc=2, file=sys.stderr):
"""Alternative to pprint that produces better output for our use case."""
if isinstance(output, dict):
if indent == 0:
newline = 1
else:
newline = 0
for key, value in sorted(output.items()):
print('{}{}{}'.format('\n' * newline, ' ' * indent, key), file=file)
PrettyPrint(value, indent + inc)
elif isinstance(output, set):
for value in sorted(output):
print(f"{' ' * indent}{value}", file=file)
else:
print(f"{' ' * indent}{output}", file=file)
def ProcessNmOutput(nm_output, collect_files=False):
"""Parses the 'nm' output and collects the matched and unresolved symbols.
This function retains all of the allowlisted libraries that were loaded on
startup.
Args:
nm_output: The output from a previously ran 'nm -u' command.
collect_files: Identifies whether or not to include files in the results.
This increases the reusability of the function (see
FindLeakingSourceFiles).
Yields:
Unresolved symbols that match _RE_SYMBOL_AND_ANY_VERSION_INFO.
"""
for line in nm_output.decode('utf-8').splitlines():
line = line.lstrip()
contents = line.split(' ')
if len(contents) != 2:
if len(contents) == 1 and collect_files:
yield contents[0]
continue
results = _RE_SYMBOL_AND_ANY_VERSION_INFO.match(contents[1]).groups()
if results[3]: # True if the symbol has no associated version information.
yield results[3]
elif results[1]: # True if the symbol does have version information.
yield results[1]
else:
print(f'Invalid line in nm output: {line}', file=sys.stderr)
def RunCommand(args):
"""Executes a command with the given arguments and returns the output."""
return subprocess.check_output(args)
def main():
args = ParseArgs()
config_dir = f'{args.platform}_{args.config}'
config_path = os.path.join(paths.BUILD_OUTPUT_ROOT, config_dir)
manifest_path = os.path.join(
os.path.dirname(__file__), args.relative_manifest_path)
print(_API_LEAK_DETECTOR_TITLE, file=sys.stderr)
print(
'Analyzing for Starboard version ', args.sb_api_version, file=sys.stderr)
print('Loading allowed C99 symbols...', file=sys.stderr)
allowed_c99_symbols = LoadAllowedC99Symbols()
print(f'Building {config_dir} if necessary...', file=sys.stderr)
RunCommand(['ninja', '-j256', '-C', config_path, args.target])
# First try using the GYP shared library path 'lib/libcobalt.so'.
# TODO(b/211885836): stop considering the GYP shared library path once all
# relevant GYP builders are removed from CI.
binary_path = os.path.join(config_path, 'lib', f'lib{args.target}.so')
# Then fall back on the GN shared library path 'libcobalt.so'.
if not os.path.exists(binary_path):
binary_path = os.path.join(config_path, f'lib{args.target}.so')
# Then fall back on the executable path 'cobalt', used by both GYP and GN.
if not os.path.exists(binary_path):
binary_path = os.path.join(config_path, format(args.target))
print(f'Analyzing: {binary_path}', file=sys.stderr)
# Get all of the unresolved symbols of the binary.
nm_output = RunCommand([
'nm',
'-u',
'-C', # Demangle C++ symbols
binary_path
])
# Build a set from all of the unresolved symbols that have the allowlisted and
# Starboard functions and extern configuration variables filtered out.
def IsSbSymbol(symbol):
return symbol.startswith('Sb') or symbol.startswith('kSb')
def IsAllowedPosixSymbol(symbol, sb_api_version: int):
if sb_api_version == 16:
return symbol in _ALLOWED_SB16_POSIX_SYMBOLS
else:
return False
def IsAllowedSymbol(symbol):
if symbol in allowed_c99_symbols:
return True
if IsSbSymbol(symbol):
return True
if IsAllowedPosixSymbol(symbol, sb_api_version=args.sb_api_version):
return True
return False
leaked_symbols = set(
symbol for symbol in ProcessNmOutput(nm_output) \
if not IsAllowedSymbol(symbol)
)
if args.manifest:
print('Done!', file=sys.stderr)
print(_MANIFEST_HEADER)
PrettyPrint(leaked_symbols, file=sys.stdout)
return 0
if args.submit_check:
introduced, removed = DiffWithManifest(leaked_symbols, manifest_path)
if introduced:
PrettyPrint(
{'Leaks introduced:': FindLeakLocations(introduced, config_path)})
print(
'\nPlease see advice for addressing new leaks at go/cobalt-api-leaks.'
)
else:
print('\nNo leaks were introduced.', file=sys.stderr)
if removed:
PrettyPrint({'Leaks removed:': removed})
print('\nPlease delete removed leaks from the manifest file.')
else:
print('No leaks were removed.', file=sys.stderr)
# We counterintuitively still want to fail even if leaks are only removed
# so that a CL author knows to update the manifest.
return 1 if introduced or removed else 0
if args.inspect:
if leaked_symbols:
PrettyPrint(FindLeakLocations(leaked_symbols, config_path))
else:
print('No leaks found!', file=sys.stderr)
return 0
if __name__ == '__main__':
sys.exit(main())