| # Copyright 2018 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import logging |
| import os |
| import re |
| import struct |
| import zipfile |
| |
| # The default zipfile python module cannot open APKs properly, but this |
| # fixes it. Note that simply importing this file is sufficient to |
| # ensure that zip works correctly for all other modules. See: |
| # http://bugs.python.org/issue14315 |
| # https://hg.python.org/cpython/rev/6dd5e9556a60#l2.8 |
| def _PatchZipFile(): |
| # pylint: disable=protected-access |
| oldDecodeExtra = zipfile.ZipInfo._decodeExtra |
| def decodeExtra(self): |
| try: |
| oldDecodeExtra(self) |
| except struct.error: |
| pass |
| zipfile.ZipInfo._decodeExtra = decodeExtra |
| _PatchZipFile() |
| |
| |
| class ApkZipInfo(object): |
| """Models a single file entry from an ApkReader. |
| |
| This is very similar to the zipfile.ZipInfo class. It provides a few |
| properties describing the entry: |
| - filename (same as ZipInfo.filename) |
| - file_size (same as ZipInfo.file_size) |
| - compress_size (same as ZipInfo.file_size) |
| - file_offset (note: not provided by ZipInfo) |
| |
| And a few useful methods: IsCompressed() and IsElfFile(). |
| |
| Entries can be created by using ApkReader() methods. |
| """ |
| def __init__(self, zip_file, zip_info): |
| """Construct instance. Do not call this directly. Use ApkReader methods.""" |
| self._file = zip_file |
| self._info = zip_info |
| self._file_offset = None |
| |
| @property |
| def filename(self): |
| """Entry's file path within APK.""" |
| return self._info.filename |
| |
| @property |
| def file_size(self): |
| """Entry's extracted file size in bytes.""" |
| return self._info.file_size |
| |
| @property |
| def compress_size(self): |
| """Entry' s compressed file size in bytes.""" |
| return self._info.compress_size |
| |
| @property |
| def file_offset(self): |
| """Entry's starting file offset in the APK.""" |
| if self._file_offset is None: |
| self._file_offset = self._ZipFileOffsetFromLocalHeader( |
| self._file.fp, self._info.header_offset) |
| return self._file_offset |
| |
| def __repr__(self): |
| """Convert to string for debugging.""" |
| return 'ApkZipInfo["%s",size=0x%x,compressed=0x%x,offset=0x%x]' % ( |
| self.filename, self.file_size, self.compress_size, self.file_offset) |
| |
| def IsCompressed(self): |
| """Returns True iff the entry is compressed.""" |
| return self._info.compress_type != zipfile.ZIP_STORED |
| |
| def IsElfFile(self): |
| """Returns True iff the entry is an ELF file.""" |
| with self._file.open(self._info, 'r') as f: |
| return f.read(4) == '\x7fELF' |
| |
| @staticmethod |
| def _ZipFileOffsetFromLocalHeader(fd, local_header_offset): |
| """Return a file's start offset from its zip archive local header. |
| |
| Args: |
| fd: Input file object. |
| local_header_offset: Local header offset (from its ZipInfo entry). |
| Returns: |
| file start offset. |
| """ |
| FILE_NAME_LEN_OFFSET = 26 |
| FILE_NAME_OFFSET = 30 |
| fd.seek(local_header_offset + FILE_NAME_LEN_OFFSET) |
| file_name_len = struct.unpack('H', fd.read(2))[0] |
| extra_field_len = struct.unpack('H', fd.read(2))[0] |
| file_offset = (local_header_offset + FILE_NAME_OFFSET + |
| file_name_len + extra_field_len) |
| return file_offset |
| |
| |
| class ApkReader(object): |
| """A convenience class used to read the content of APK files. |
| |
| Its design is very similar to the one from zipfile.ZipFile, except |
| that its returns ApkZipInfo entries which provide a |file_offset| |
| property that can be used to know where a given file is located inside |
| the archive. |
| |
| It is also easy to mock for unit-testing (see MockApkReader in |
| apk_utils_unittest.py) without creating any files on disk. |
| |
| Usage is the following: |
| - Create an instance using a with statement (for proper unit-testing). |
| - Call ListEntries() to list all entries in the archive. This returns |
| a list of ApkZipInfo entries. |
| - Or call FindEntry() corresponding to a given path within the archive. |
| |
| For example: |
| with ApkReader(input_apk_path) as reader: |
| info = reader.FindEntry('lib/armeabi-v7a/libfoo.so') |
| if info.IsCompressed() or not info.IsElfFile(): |
| raise Exception('Invalid library path") |
| |
| The ApkZipInfo can be used to inspect the entry's metadata, or read its |
| content with the ReadAll() method. See its documentation for all details. |
| """ |
| def __init__(self, apk_path): |
| """Initialize instance.""" |
| self._zip_file = zipfile.ZipFile(apk_path, 'r') |
| self._path = apk_path |
| |
| def __enter__(self): |
| """Python context manager entry.""" |
| return self |
| |
| def __exit__(self, *kwargs): |
| """Python context manager exit.""" |
| self.Close() |
| |
| @property |
| def path(self): |
| """The corresponding input APK path.""" |
| return self._path |
| |
| def Close(self): |
| """Close the reader (and underlying ZipFile instance).""" |
| self._zip_file.close() |
| |
| def ListEntries(self): |
| """Return a list of ApkZipInfo entries for this APK.""" |
| result = [] |
| for info in self._zip_file.infolist(): |
| result.append(ApkZipInfo(self._zip_file, info)) |
| return result |
| |
| def FindEntry(self, file_path): |
| """Return an ApkZipInfo instance for a given archive file path. |
| |
| Args: |
| file_path: zip file path. |
| Return: |
| A new ApkZipInfo entry on success. |
| Raises: |
| KeyError on failure (entry not found). |
| """ |
| info = self._zip_file.getinfo(file_path) |
| return ApkZipInfo(self._zip_file, info) |
| |
| |
| |
| class ApkNativeLibraries(object): |
| """A class for the list of uncompressed shared libraries inside an APK. |
| |
| Create a new instance by passing the path to an input APK, then use |
| the FindLibraryByOffset() method to find the native shared library path |
| corresponding to a given file offset. |
| |
| GetAbiList() and GetLibrariesList() can also be used to inspect |
| the state of the instance. |
| """ |
| def __init__(self, apk_reader): |
| """Initialize instance. |
| |
| Args: |
| apk_reader: An ApkReader instance corresponding to the input APK. |
| """ |
| self._native_libs = [] |
| for entry in apk_reader.ListEntries(): |
| # Chromium uses so-called 'placeholder' native shared libraries |
| # that have a size of 0, and are only used to deal with bugs in |
| # older Android system releases (they are never loaded and cannot |
| # appear in stack traces). Ignore these here to avoid generating |
| # confusing results. |
| if entry.file_size == 0: |
| continue |
| |
| # Only uncompressed libraries can appear in stack traces. |
| if entry.IsCompressed(): |
| continue |
| |
| # Only consider files within lib/ and with a filename ending with .so |
| # at the moment. NOTE: Do not require a 'lib' prefix, since that would |
| # prevent finding the 'crazy.libXXX.so' libraries used by Chromium. |
| if (not entry.filename.startswith('lib/') or |
| not entry.filename.endswith('.so')): |
| continue |
| |
| lib_path = entry.filename |
| |
| self._native_libs.append( |
| (lib_path, entry.file_offset, entry.file_offset + entry.file_size)) |
| |
| def IsEmpty(self): |
| """Return true iff the list is empty.""" |
| return not bool(self._native_libs) |
| |
| def GetLibraries(self): |
| """Return the list of all library paths in this instance.""" |
| return sorted([x[0] for x in self._native_libs]) |
| |
| def GetDumpList(self): |
| """Retrieve full library map. |
| |
| Returns: |
| A list of (lib_path, file_offset, file_size) tuples, sorted |
| in increasing |file_offset| values. |
| """ |
| result = [] |
| for entry in self._native_libs: |
| lib_path, file_start, file_end = entry |
| result.append((lib_path, file_start, file_end - file_start)) |
| |
| return sorted(result, lambda x, y: cmp(x[1], y[1])) |
| |
| def FindLibraryByOffset(self, file_offset): |
| """Find the native library at a given file offset. |
| |
| Args: |
| file_offset: File offset within the original APK. |
| Returns: |
| Returns a (lib_path, lib_offset) tuple on success, or (None, 0) |
| on failure. Note that lib_path will omit the 'lib/$ABI/' prefix, |
| lib_offset is the adjustment of file_offset within the library. |
| """ |
| for lib_path, start_offset, end_offset in self._native_libs: |
| if file_offset >= start_offset and file_offset < end_offset: |
| return (lib_path, file_offset - start_offset) |
| |
| return (None, 0) |
| |
| |
| class ApkLibraryPathTranslator(object): |
| """Translates APK file paths + byte offsets into library path + offset. |
| |
| The purpose of this class is to translate a native shared library path |
| that points to an APK into a new device-specific path that points to a |
| native shared library, as if it was installed there. E.g.: |
| |
| ('/data/data/com.example.app-1/base.apk', 0x123be00) |
| |
| would be translated into: |
| |
| ('/data/data/com.example.app-1/base.apk!lib/libfoo.so', 0x3be00) |
| |
| If the original APK (installed as base.apk) contains an uncompressed shared |
| library under lib/armeabi-v7a/libfoo.so at offset 0x120000. |
| |
| Note that the virtual device path after the ! doesn't necessarily match |
| the path inside the .apk. This doesn't really matter for the rest of |
| the symbolization functions since only the file's base name can be used |
| to find the corresponding file on the host. |
| |
| Usage is the following: |
| |
| 1/ Create new instance. |
| |
| 2/ Call AddHostApk() one or several times to add the host path |
| of an APK, its package name, and device-installed named. |
| |
| 3/ Call TranslatePath() to translate a (path, offset) tuple corresponding |
| to an on-device APK, into the corresponding virtual device library |
| path and offset. |
| """ |
| |
| # Depending on the version of the system, a non-system APK might be installed |
| # on a path that looks like the following: |
| # |
| # * /data/..../<package_name>-<number>.apk, where <number> is used to |
| # distinguish several versions of the APK during package updates. |
| # |
| # * /data/..../<package_name>-<suffix>/base.apk, where <suffix> is a |
| # string of random ASCII characters following the dash after the |
| # package name. This serves as a way to distinguish the installation |
| # paths during package update, and randomize its final location |
| # (to prevent apps from hard-coding the paths to other apps). |
| # |
| # Note that the 'base.apk' name comes from the system. |
| # |
| # * /data/.../<package_name>-<suffix>/<split_name>.apk, where <suffix> |
| # is the same as above, and <split_name> is the name of am app bundle |
| # split APK. |
| # |
| # System APKs are installed on paths that look like /system/app/Foo.apk |
| # but this class ignores them intentionally. |
| |
| # Compiler regular expression for the first format above. |
| _RE_APK_PATH_1 = re.compile( |
| r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<version>[0-9]+)\.apk') |
| |
| # Compiled regular expression for the second and third formats above. |
| _RE_APK_PATH_2 = re.compile( |
| r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<suffix>[^/]+)/' + |
| r'(?P<apk_name>.+\.apk)') |
| |
| def __init__(self): |
| """Initialize instance. Call AddHostApk() to add host apk file paths.""" |
| self._path_map = {} # Maps (package_name, apk_name) to host-side APK path. |
| self._libs_map = {} # Maps APK host path to ApkNativeLibrariesMap instance. |
| |
| def AddHostApk(self, package_name, native_libs, device_apk_name=None): |
| """Add a file path to the host APK search list. |
| |
| Args: |
| package_name: Corresponding apk package name. |
| native_libs: ApkNativeLibraries instance for the corresponding APK. |
| device_apk_name: Optional expected name of the installed APK on the |
| device. This is only useful when symbolizing app bundle that run on |
| Android L+. I.e. it will be ignored in other cases. |
| """ |
| if native_libs.IsEmpty(): |
| logging.debug('Ignoring host APK without any uncompressed native ' + |
| 'libraries: %s', device_apk_name) |
| return |
| |
| # If the APK name is not provided, use the default of 'base.apk'. This |
| # will be ignored if we find <package_name>-<number>.apk file paths |
| # in the input, but will work properly for Android L+, as long as we're |
| # not using Android app bundles. |
| device_apk_name = device_apk_name or 'base.apk' |
| |
| key = "%s/%s" % (package_name, device_apk_name) |
| if key in self._libs_map: |
| raise KeyError('There is already an APK associated with (%s)' % key) |
| |
| self._libs_map[key] = native_libs |
| |
| @staticmethod |
| def _MatchApkDeviceInstallPath(apk_path): |
| """Check whether a given path matches an installed APK device file path. |
| |
| Args: |
| apk_path: Device-specific file path. |
| Returns: |
| On success, a (package_name, apk_name) tuple. On failure, (None. None). |
| """ |
| m = ApkLibraryPathTranslator._RE_APK_PATH_1.match(apk_path) |
| if m: |
| return (m.group('package_name'), 'base.apk') |
| |
| m = ApkLibraryPathTranslator._RE_APK_PATH_2.match(apk_path) |
| if m: |
| return (m.group('package_name'), m.group('apk_name')) |
| |
| return (None, None) |
| |
| def TranslatePath(self, apk_path, apk_offset): |
| """Translate a potential apk file path + offset into library path + offset. |
| |
| Args: |
| apk_path: Library or apk file path on the device (e.g. |
| '/data/data/com.example.app-XSAHKSJH/base.apk'). |
| apk_offset: Byte offset within the library or apk. |
| |
| Returns: |
| a new (lib_path, lib_offset) tuple. If |apk_path| points to an APK, |
| then this function searches inside the corresponding host-side APKs |
| (added with AddHostApk() above) for the corresponding uncompressed |
| native shared library at |apk_offset|, if found, this returns a new |
| device-specific path corresponding to a virtual installation of said |
| library with an adjusted offset. |
| |
| Otherwise, just return the original (apk_path, apk_offset) values. |
| """ |
| if not apk_path.endswith('.apk'): |
| return (apk_path, apk_offset) |
| |
| apk_package, apk_name = self._MatchApkDeviceInstallPath(apk_path) |
| if not apk_package: |
| return (apk_path, apk_offset) |
| |
| key = '%s/%s' % (apk_package, apk_name) |
| native_libs = self._libs_map.get(key) |
| if not native_libs: |
| logging.debug('Unknown %s package', key) |
| return (apk_path, apk_offset) |
| |
| lib_name, new_offset = native_libs.FindLibraryByOffset(apk_offset) |
| if not lib_name: |
| logging.debug('Invalid offset in %s.apk package: %d', key, apk_offset) |
| return (apk_path, apk_offset) |
| |
| lib_name = os.path.basename(lib_name) |
| |
| # Some libraries are stored with a crazy. prefix inside the APK, this |
| # is done to prevent the PackageManager from extracting the libraries |
| # at installation time when running on pre Android M systems, where the |
| # system linker cannot load libraries directly from APKs. |
| crazy_prefix = 'crazy.' |
| if lib_name.startswith(crazy_prefix): |
| lib_name = lib_name[len(crazy_prefix):] |
| |
| # Put this in a fictional lib sub-directory for good measure. |
| new_path = '%s!lib/%s' % (apk_path, lib_name) |
| |
| return (new_path, new_offset) |