|  | # Copyright 2018 The Chromium Authors. All rights reserved. | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  |  | 
|  | import logging | 
|  | import os | 
|  | import re | 
|  | import struct | 
|  | import zipfile | 
|  |  | 
|  | # The default zipfile python module cannot open APKs properly, but this | 
|  | # fixes it. Note that simply importing this file is sufficient to | 
|  | # ensure that zip works correctly for all other modules. See: | 
|  | # http://bugs.python.org/issue14315 | 
|  | # https://hg.python.org/cpython/rev/6dd5e9556a60#l2.8 | 
|  | def _PatchZipFile(): | 
|  | # pylint: disable=protected-access | 
|  | oldDecodeExtra = zipfile.ZipInfo._decodeExtra | 
|  | def decodeExtra(self): | 
|  | try: | 
|  | oldDecodeExtra(self) | 
|  | except struct.error: | 
|  | pass | 
|  | zipfile.ZipInfo._decodeExtra = decodeExtra | 
|  | _PatchZipFile() | 
|  |  | 
|  |  | 
|  | class ApkZipInfo(object): | 
|  | """Models a single file entry from an ApkReader. | 
|  |  | 
|  | This is very similar to the zipfile.ZipInfo class. It provides a few | 
|  | properties describing the entry: | 
|  | - filename          (same as ZipInfo.filename) | 
|  | - file_size         (same as ZipInfo.file_size) | 
|  | - compress_size     (same as ZipInfo.file_size) | 
|  | - file_offset       (note: not provided by ZipInfo) | 
|  |  | 
|  | And a few useful methods: IsCompressed() and IsElfFile(). | 
|  |  | 
|  | Entries can be created by using ApkReader() methods. | 
|  | """ | 
|  | def __init__(self, zip_file, zip_info): | 
|  | """Construct instance. Do not call this directly. Use ApkReader methods.""" | 
|  | self._file = zip_file | 
|  | self._info = zip_info | 
|  | self._file_offset = None | 
|  |  | 
|  | @property | 
|  | def filename(self): | 
|  | """Entry's file path within APK.""" | 
|  | return self._info.filename | 
|  |  | 
|  | @property | 
|  | def file_size(self): | 
|  | """Entry's extracted file size in bytes.""" | 
|  | return self._info.file_size | 
|  |  | 
|  | @property | 
|  | def compress_size(self): | 
|  | """Entry' s compressed file size in bytes.""" | 
|  | return self._info.compress_size | 
|  |  | 
|  | @property | 
|  | def file_offset(self): | 
|  | """Entry's starting file offset in the APK.""" | 
|  | if self._file_offset is None: | 
|  | self._file_offset = self._ZipFileOffsetFromLocalHeader( | 
|  | self._file.fp, self._info.header_offset) | 
|  | return self._file_offset | 
|  |  | 
|  | def __repr__(self): | 
|  | """Convert to string for debugging.""" | 
|  | return 'ApkZipInfo["%s",size=0x%x,compressed=0x%x,offset=0x%x]' % ( | 
|  | self.filename, self.file_size, self.compress_size, self.file_offset) | 
|  |  | 
|  | def IsCompressed(self): | 
|  | """Returns True iff the entry is compressed.""" | 
|  | return self._info.compress_type != zipfile.ZIP_STORED | 
|  |  | 
|  | def IsElfFile(self): | 
|  | """Returns True iff the entry is an ELF file.""" | 
|  | with self._file.open(self._info, 'r') as f: | 
|  | return f.read(4) == '\x7fELF' | 
|  |  | 
|  | @staticmethod | 
|  | def _ZipFileOffsetFromLocalHeader(fd, local_header_offset): | 
|  | """Return a file's start offset from its zip archive local header. | 
|  |  | 
|  | Args: | 
|  | fd: Input file object. | 
|  | local_header_offset: Local header offset (from its ZipInfo entry). | 
|  | Returns: | 
|  | file start offset. | 
|  | """ | 
|  | FILE_NAME_LEN_OFFSET = 26 | 
|  | FILE_NAME_OFFSET = 30 | 
|  | fd.seek(local_header_offset + FILE_NAME_LEN_OFFSET) | 
|  | file_name_len = struct.unpack('H', fd.read(2))[0] | 
|  | extra_field_len = struct.unpack('H', fd.read(2))[0] | 
|  | file_offset = (local_header_offset + FILE_NAME_OFFSET + | 
|  | file_name_len + extra_field_len) | 
|  | return file_offset | 
|  |  | 
|  |  | 
|  | class ApkReader(object): | 
|  | """A convenience class used to read the content of APK files. | 
|  |  | 
|  | Its design is very similar to the one from zipfile.ZipFile, except | 
|  | that its returns ApkZipInfo entries which provide a |file_offset| | 
|  | property that can be used to know where a given file is located inside | 
|  | the archive. | 
|  |  | 
|  | It is also easy to mock for unit-testing (see MockApkReader in | 
|  | apk_utils_unittest.py) without creating any files on disk. | 
|  |  | 
|  | Usage is the following: | 
|  | - Create an instance using a with statement (for proper unit-testing). | 
|  | - Call ListEntries() to list all entries in the archive. This returns | 
|  | a list of ApkZipInfo entries. | 
|  | - Or call FindEntry() corresponding to a given path within the archive. | 
|  |  | 
|  | For example: | 
|  | with ApkReader(input_apk_path) as reader: | 
|  | info = reader.FindEntry('lib/armeabi-v7a/libfoo.so') | 
|  | if info.IsCompressed() or not info.IsElfFile(): | 
|  | raise Exception('Invalid library path") | 
|  |  | 
|  | The ApkZipInfo can be used to inspect the entry's metadata, or read its | 
|  | content with the ReadAll() method. See its documentation for all details. | 
|  | """ | 
|  | def __init__(self, apk_path): | 
|  | """Initialize instance.""" | 
|  | self._zip_file = zipfile.ZipFile(apk_path, 'r') | 
|  | self._path = apk_path | 
|  |  | 
|  | def __enter__(self): | 
|  | """Python context manager entry.""" | 
|  | return self | 
|  |  | 
|  | def __exit__(self, *kwargs): | 
|  | """Python context manager exit.""" | 
|  | self.Close() | 
|  |  | 
|  | @property | 
|  | def path(self): | 
|  | """The corresponding input APK path.""" | 
|  | return self._path | 
|  |  | 
|  | def Close(self): | 
|  | """Close the reader (and underlying ZipFile instance).""" | 
|  | self._zip_file.close() | 
|  |  | 
|  | def ListEntries(self): | 
|  | """Return a list of ApkZipInfo entries for this APK.""" | 
|  | result = [] | 
|  | for info in self._zip_file.infolist(): | 
|  | result.append(ApkZipInfo(self._zip_file, info)) | 
|  | return result | 
|  |  | 
|  | def FindEntry(self, file_path): | 
|  | """Return an ApkZipInfo instance for a given archive file path. | 
|  |  | 
|  | Args: | 
|  | file_path: zip file path. | 
|  | Return: | 
|  | A new ApkZipInfo entry on success. | 
|  | Raises: | 
|  | KeyError on failure (entry not found). | 
|  | """ | 
|  | info = self._zip_file.getinfo(file_path) | 
|  | return ApkZipInfo(self._zip_file, info) | 
|  |  | 
|  |  | 
|  |  | 
|  | class ApkNativeLibraries(object): | 
|  | """A class for the list of uncompressed shared libraries inside an APK. | 
|  |  | 
|  | Create a new instance by passing the path to an input APK, then use | 
|  | the FindLibraryByOffset() method to find the native shared library path | 
|  | corresponding to a given file offset. | 
|  |  | 
|  | GetAbiList() and GetLibrariesList() can also be used to inspect | 
|  | the state of the instance. | 
|  | """ | 
|  | def __init__(self, apk_reader): | 
|  | """Initialize instance. | 
|  |  | 
|  | Args: | 
|  | apk_reader: An ApkReader instance corresponding to the input APK. | 
|  | """ | 
|  | self._native_libs = [] | 
|  | for entry in apk_reader.ListEntries(): | 
|  | # Chromium uses so-called 'placeholder' native shared libraries | 
|  | # that have a size of 0, and are only used to deal with bugs in | 
|  | # older Android system releases (they are never loaded and cannot | 
|  | # appear in stack traces). Ignore these here to avoid generating | 
|  | # confusing results. | 
|  | if entry.file_size == 0: | 
|  | continue | 
|  |  | 
|  | # Only uncompressed libraries can appear in stack traces. | 
|  | if entry.IsCompressed(): | 
|  | continue | 
|  |  | 
|  | # Only consider files within lib/ and with a filename ending with .so | 
|  | # at the moment. NOTE: Do not require a 'lib' prefix, since that would | 
|  | # prevent finding the 'crazy.libXXX.so' libraries used by Chromium. | 
|  | if (not entry.filename.startswith('lib/') or | 
|  | not entry.filename.endswith('.so')): | 
|  | continue | 
|  |  | 
|  | lib_path = entry.filename | 
|  |  | 
|  | self._native_libs.append( | 
|  | (lib_path, entry.file_offset, entry.file_offset + entry.file_size)) | 
|  |  | 
|  | def IsEmpty(self): | 
|  | """Return true iff the list is empty.""" | 
|  | return not bool(self._native_libs) | 
|  |  | 
|  | def GetLibraries(self): | 
|  | """Return the list of all library paths in this instance.""" | 
|  | return sorted([x[0] for x in self._native_libs]) | 
|  |  | 
|  | def GetDumpList(self): | 
|  | """Retrieve full library map. | 
|  |  | 
|  | Returns: | 
|  | A list of (lib_path, file_offset, file_size) tuples, sorted | 
|  | in increasing |file_offset| values. | 
|  | """ | 
|  | result = [] | 
|  | for entry in self._native_libs: | 
|  | lib_path, file_start, file_end = entry | 
|  | result.append((lib_path, file_start, file_end - file_start)) | 
|  |  | 
|  | return sorted(result, lambda x, y: cmp(x[1], y[1])) | 
|  |  | 
|  | def FindLibraryByOffset(self, file_offset): | 
|  | """Find the native library at a given file offset. | 
|  |  | 
|  | Args: | 
|  | file_offset: File offset within the original APK. | 
|  | Returns: | 
|  | Returns a (lib_path, lib_offset) tuple on success, or (None, 0) | 
|  | on failure. Note that lib_path will omit the 'lib/$ABI/' prefix, | 
|  | lib_offset is the adjustment of file_offset within the library. | 
|  | """ | 
|  | for lib_path, start_offset, end_offset in self._native_libs: | 
|  | if file_offset >= start_offset and file_offset < end_offset: | 
|  | return (lib_path, file_offset - start_offset) | 
|  |  | 
|  | return (None, 0) | 
|  |  | 
|  |  | 
|  | class ApkLibraryPathTranslator(object): | 
|  | """Translates APK file paths + byte offsets into library path + offset. | 
|  |  | 
|  | The purpose of this class is to translate a native shared library path | 
|  | that points to an APK into a new device-specific path that points to a | 
|  | native shared library, as if it was installed there. E.g.: | 
|  |  | 
|  | ('/data/data/com.example.app-1/base.apk', 0x123be00) | 
|  |  | 
|  | would be translated into: | 
|  |  | 
|  | ('/data/data/com.example.app-1/base.apk!lib/libfoo.so', 0x3be00) | 
|  |  | 
|  | If the original APK (installed as base.apk) contains an uncompressed shared | 
|  | library under lib/armeabi-v7a/libfoo.so at offset 0x120000. | 
|  |  | 
|  | Note that the virtual device path after the ! doesn't necessarily match | 
|  | the path inside the .apk. This doesn't really matter for the rest of | 
|  | the symbolization functions since only the file's base name can be used | 
|  | to find the corresponding file on the host. | 
|  |  | 
|  | Usage is the following: | 
|  |  | 
|  | 1/ Create new instance. | 
|  |  | 
|  | 2/ Call AddHostApk() one or several times to add the host path | 
|  | of an APK, its package name, and device-installed named. | 
|  |  | 
|  | 3/ Call TranslatePath() to translate a (path, offset) tuple corresponding | 
|  | to an on-device APK, into the corresponding virtual device library | 
|  | path and offset. | 
|  | """ | 
|  |  | 
|  | # Depending on the version of the system, a non-system APK might be installed | 
|  | # on a path that looks like the following: | 
|  | # | 
|  | #  * /data/..../<package_name>-<number>.apk, where <number> is used to | 
|  | #    distinguish several versions of the APK during package updates. | 
|  | # | 
|  | #  * /data/..../<package_name>-<suffix>/base.apk, where <suffix> is a | 
|  | #    string of random ASCII characters following the dash after the | 
|  | #    package name. This serves as a way to distinguish the installation | 
|  | #    paths during package update, and randomize its final location | 
|  | #    (to prevent apps from hard-coding the paths to other apps). | 
|  | # | 
|  | #    Note that the 'base.apk' name comes from the system. | 
|  | # | 
|  | #  * /data/.../<package_name>-<suffix>/<split_name>.apk, where <suffix> | 
|  | #    is the same as above, and <split_name> is the name of am app bundle | 
|  | #    split APK. | 
|  | # | 
|  | # System APKs are installed on paths that look like /system/app/Foo.apk | 
|  | # but this class ignores them intentionally. | 
|  |  | 
|  | # Compiler regular expression for the first format above. | 
|  | _RE_APK_PATH_1 = re.compile( | 
|  | r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<version>[0-9]+)\.apk') | 
|  |  | 
|  | # Compiled regular expression for the second and third formats above. | 
|  | _RE_APK_PATH_2 = re.compile( | 
|  | r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<suffix>[^/]+)/' + | 
|  | r'(?P<apk_name>.+\.apk)') | 
|  |  | 
|  | def __init__(self): | 
|  | """Initialize instance. Call AddHostApk() to add host apk file paths.""" | 
|  | self._path_map = {}  # Maps (package_name, apk_name) to host-side APK path. | 
|  | self._libs_map = {}  # Maps APK host path to ApkNativeLibrariesMap instance. | 
|  |  | 
|  | def AddHostApk(self, package_name, native_libs, device_apk_name=None): | 
|  | """Add a file path to the host APK search list. | 
|  |  | 
|  | Args: | 
|  | package_name: Corresponding apk package name. | 
|  | native_libs: ApkNativeLibraries instance for the corresponding APK. | 
|  | device_apk_name: Optional expected name of the installed APK on the | 
|  | device. This is only useful when symbolizing app bundle that run on | 
|  | Android L+. I.e. it will be ignored in other cases. | 
|  | """ | 
|  | if native_libs.IsEmpty(): | 
|  | logging.debug('Ignoring host APK without any uncompressed native ' + | 
|  | 'libraries: %s', device_apk_name) | 
|  | return | 
|  |  | 
|  | # If the APK name is not provided, use the default of 'base.apk'. This | 
|  | # will be ignored if we find <package_name>-<number>.apk file paths | 
|  | # in the input, but will work properly for Android L+, as long as we're | 
|  | # not using Android app bundles. | 
|  | device_apk_name = device_apk_name or 'base.apk' | 
|  |  | 
|  | key = "%s/%s" % (package_name, device_apk_name) | 
|  | if key in self._libs_map: | 
|  | raise KeyError('There is already an APK associated with (%s)' % key) | 
|  |  | 
|  | self._libs_map[key] = native_libs | 
|  |  | 
|  | @staticmethod | 
|  | def _MatchApkDeviceInstallPath(apk_path): | 
|  | """Check whether a given path matches an installed APK device file path. | 
|  |  | 
|  | Args: | 
|  | apk_path: Device-specific file path. | 
|  | Returns: | 
|  | On success, a (package_name, apk_name) tuple. On failure, (None. None). | 
|  | """ | 
|  | m = ApkLibraryPathTranslator._RE_APK_PATH_1.match(apk_path) | 
|  | if m: | 
|  | return (m.group('package_name'), 'base.apk') | 
|  |  | 
|  | m = ApkLibraryPathTranslator._RE_APK_PATH_2.match(apk_path) | 
|  | if m: | 
|  | return (m.group('package_name'), m.group('apk_name')) | 
|  |  | 
|  | return (None, None) | 
|  |  | 
|  | def TranslatePath(self, apk_path, apk_offset): | 
|  | """Translate a potential apk file path + offset into library path + offset. | 
|  |  | 
|  | Args: | 
|  | apk_path: Library or apk file path on the device (e.g. | 
|  | '/data/data/com.example.app-XSAHKSJH/base.apk'). | 
|  | apk_offset: Byte offset within the library or apk. | 
|  |  | 
|  | Returns: | 
|  | a new (lib_path, lib_offset) tuple. If |apk_path| points to an APK, | 
|  | then this function searches inside the corresponding host-side APKs | 
|  | (added with AddHostApk() above) for the corresponding uncompressed | 
|  | native shared library at |apk_offset|, if found, this returns a new | 
|  | device-specific path corresponding to a virtual installation of said | 
|  | library with an adjusted offset. | 
|  |  | 
|  | Otherwise, just return the original (apk_path, apk_offset) values. | 
|  | """ | 
|  | if not apk_path.endswith('.apk'): | 
|  | return (apk_path, apk_offset) | 
|  |  | 
|  | apk_package, apk_name = self._MatchApkDeviceInstallPath(apk_path) | 
|  | if not apk_package: | 
|  | return (apk_path, apk_offset) | 
|  |  | 
|  | key = '%s/%s' % (apk_package, apk_name) | 
|  | native_libs = self._libs_map.get(key) | 
|  | if not native_libs: | 
|  | logging.debug('Unknown %s package', key) | 
|  | return (apk_path, apk_offset) | 
|  |  | 
|  | lib_name, new_offset = native_libs.FindLibraryByOffset(apk_offset) | 
|  | if not lib_name: | 
|  | logging.debug('Invalid offset in %s.apk package: %d', key, apk_offset) | 
|  | return (apk_path, apk_offset) | 
|  |  | 
|  | lib_name = os.path.basename(lib_name) | 
|  |  | 
|  | # Some libraries are stored with a crazy. prefix inside the APK, this | 
|  | # is done to prevent the PackageManager from extracting the libraries | 
|  | # at installation time when running on pre Android M systems, where the | 
|  | # system linker cannot load libraries directly from APKs. | 
|  | crazy_prefix = 'crazy.' | 
|  | if lib_name.startswith(crazy_prefix): | 
|  | lib_name = lib_name[len(crazy_prefix):] | 
|  |  | 
|  | # Put this in a fictional lib sub-directory for good measure. | 
|  | new_path = '%s!lib/%s' % (apk_path, lib_name) | 
|  |  | 
|  | return (new_path, new_offset) |