blob: e4a4628eccf853d6f4d57f22917965167c523436 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Simple script used to generate the GNU hash table test data"""
import collections
import os
from pylib import source_utils
from pylib import elf_utils
script_name = os.path.basename(__file__)
def GnuHash(name):
"""Compute the GNU hash of a given input string."""
h = 5381
for c in name:
h = (h * 33 + ord(c)) & 0xffffffff
return h
class BloomFilter(object):
"""A class used to model the bloom filter used in GNU hash tables.
Usage is the following:
1) Create new instance.
2) Call Add() repeatedly to add new entries for each symbol hash value.
3) Call AsCSourceUint32Array() to generate a C source fragment
corresponding to the content of an array of 32-bit words for the
filter.
4) Also use __str__() to print a human-friendly representation of the
filter to check everything if needed.
"""
def __init__(self, bloom_size, bloom_shift, bloom_bits):
"""Create instance.
Args:
bloom_size: number of words in the bloom filter.
bloom_shift: bloom bit shift to use for secondary bit.
bloom_bits: number of bits in each filter word (32 or 64).
"""
self.bloom_size_ = bloom_size
self.bloom_shift_ = bloom_shift
self.bloom_bits_ = bloom_bits
self.bloom_filter_ = [0] * bloom_size
def GetBits(self, gnu_hash):
"""Return (index, bit0, bit1) tuple corresponding to a given hash."""
bloom_index = (gnu_hash / self.bloom_bits_) % self.bloom_size_
bloom_bit0 = (gnu_hash % self.bloom_bits_)
bloom_bit1 = ((gnu_hash >> self.bloom_shift_) % self.bloom_bits_)
return bloom_index, bloom_bit0, bloom_bit1
def Add(self, gnu_hash):
"""Add a new entry to the filter."""
word, bit0, bit1 = self.GetBits(gnu_hash)
self.bloom_filter_[word] |= (1 << bit0) | (1 << bit1)
def AsCSourceUint32Array(self):
"""Generate C source fragment for 32-bit uint array data."""
if self.bloom_bits_ == 64:
# Convert to array of 32-bit values first. Assume little-endianess.
values = []
for bloom in self.bloom_filter_:
values += [bloom & 0xffffffff, (bloom >> 32) % 0xffffffff]
else:
values = self.bloom_filter_
return source_utils.CSourceForIntegerHexArray(values, 32)
def __str__(self):
"""Convert bloom filter instance to human-friendly representation."""
out = 'Bloom filter (%d bits):\n' % self.bloom_bits_
out += 'bit#'
if self.bloom_bits_ == 64:
out += ' 56 48 40 32'
out += ' 24 16 8 0'
for bloom in self.bloom_filter_:
for n in range(self.bloom_bits_):
if (n % 8) == 0:
if n > 0:
out += ' '
else:
out += '\n '
if ((bloom & (1 << (self.bloom_bits_ - n - 1))) != 0):
out += 'x'
else:
out += '.'
out += '\n\n also as: '
for bloom in self.bloom_filter_:
if self.bloom_bits_ == 64:
out += ' 0x%016x' % bloom
else:
out += ' 0x%08x' % bloom
out += '\n'
return out
class GnuHashTable(object):
def __init__(self, sym_offset, num_buckets, bloom_size, bloom_shift,
bloom_bits, symbol_names):
"""Initialize a new GNU hash table instance.
Args:
sym_offset: Dynamic symbols offset, must be > 0.
num_buckets: Number of hash buckets, must be > 0.
bloom_size: Bloom filter size in words of |bloom_bits| bits.
bloom_shift: Bloom filter shift.
bloom_bits: Either 32 or 64, size of bloom filter words.
symbol_names: List of symbol names.
"""
self.num_buckets_ = num_buckets
self.sym_offset_ = sym_offset
self.bloom_size_ = bloom_size
self.bloom_shift_ = bloom_shift
# Create a list of (symbol, hash) values, sorted by increasing bucket index
sorted_symbols = sorted([(x, GnuHash(x)) for x in symbol_names],
key=lambda t: t[1] % num_buckets)
self.symbols_ = [t[0] for t in sorted_symbols]
self.hashes_ = [t[1] for t in sorted_symbols]
# Build bucket and chain arrays.
buckets = [0] * num_buckets
chain = [0] * len(sorted_symbols)
last_bucket_index = -1
for n, symbol in enumerate(sorted_symbols):
gnu_hash = self.hashes_[n]
bucket_index = gnu_hash % num_buckets
if bucket_index != last_bucket_index:
buckets[bucket_index] = n + sym_offset
last_bucket_index = bucket_index
if n > 0: chain[n - 1] |= 1
chain[n] = gnu_hash & ~1
if chain: chain[-1] |= 1
self.buckets_ = buckets
self.chain_ = chain
# Generate bloom filters for both 32 and 64 bits.
self.bloom_filter32_ = self._GenerateBloomFilter(32)
self.bloom_filter64_ = self._GenerateBloomFilter(64)
# Generate final string table and symbol offsets.
self.string_table_, self.symbol_offsets_ = \
elf_utils.GenerateStringTable(self.symbols_)
def _GenerateBloomFilter(self, bloom_bits):
"""Generate bloom filter array for a specific bitness."""
bloom = BloomFilter(self.bloom_size_, self.bloom_shift_, bloom_bits)
for gnu_hash in self.hashes_:
bloom.Add(gnu_hash)
return bloom
def __str__(self):
"""Human friendly text description for this table."""
out = 'GNU hash table: num_buckets=%d bloom_size=%d bloom_shift=%d\n\n' % (
self.num_buckets_, self.bloom_size_, self.bloom_shift_)
out += 'idx symbol hash bucket bloom32 bloom64 chain\n\n'
out += ' 0 ST_UNDEF\n'
for n, symbol in enumerate(self.symbols_):
gnu_hash = self.hashes_[n]
bucket_index = gnu_hash % self.num_buckets_
bloom32_index, bloom32_bit0, bloom32_bit1 = self.bloom_filter32_.GetBits(gnu_hash)
bloom64_index, bloom64_bit0, bloom64_bit1 = self.bloom_filter64_.GetBits(gnu_hash)
out += '%3d %-20s %08x %-3d %d:%02d:%02d %d:%02d:%02d %08x\n' % (
n + 1, symbol, gnu_hash, bucket_index, bloom32_index, bloom32_bit0,
bloom32_bit1, bloom64_index, bloom64_bit0, bloom64_bit1,
self.chain_[n])
out += '\nBuckets: '
comma = ''
for b in self.buckets_:
out += '%s%d' % (comma, b)
comma = ', '
out += '\n\n%s\n%s' % (self.bloom_filter32_, self.bloom_filter64_)
return out
def AsCSource(self, variable_prefix, guard_macro_name):
"""Dump the content of this instance."""
out = source_utils.CSourceBeginAutoGeneratedHeader(script_name,
guard_macro_name)
out += source_utils.CSourceForComments(str(self))
out += source_utils.CSourceForConstCharArray(
self.string_table_, 'k%sStringTable' % variable_prefix)
out += '\n'
out += elf_utils.CSourceForElfSymbolListMacro(variable_prefix,
self.symbols_,
self.symbol_offsets_)
out += '\n'
out += elf_utils.CSourceForElfSymbolTable(variable_prefix,
self.symbols_,
self.symbol_offsets_)
out += '\nstatic const uint32_t k%sHashTable[] = {\n' % variable_prefix
out += ' %d, // num_buckets\n' % self.num_buckets_
out += ' %d, // sym_offset\n' % self.sym_offset_
out += ' %d, // bloom_size\n' % self.bloom_size_
out += ' %d, // bloom_shift\n' % self.bloom_shift_
out += ' // Bloom filter words\n'
out += '#if UINTPTR_MAX == UINT32_MAX // 32-bit bloom filter words\n'
out += self.bloom_filter32_.AsCSourceUint32Array()
out += '#else // 64-bits filter bloom words (assumes little-endianess)\n'
out += self.bloom_filter64_.AsCSourceUint32Array()
out += '#endif // bloom filter words\n'
out += ' // Buckets\n'
out += source_utils.CSourceForIntegerHexArray(self.buckets_, 32)
out += ' // Chain\n'
out += source_utils.CSourceForIntegerHexArray(self.chain_, 32)
out += '};\n'
out += source_utils.CSourceEndAutoGeneratedHeader(script_name,
guard_macro_name)
return out
def main():
# Same data as the one found on the following web page, to ease verification:
#
# https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/
#
# NOTE: The bloom filter values and bitmaps on that page are widely incorrect
# but the bloom word and bit indices are ok though!
#
table = GnuHashTable(1, 4, 2, 5, 64, [
'cfsetispeed', 'strsigna', 'hcreate_', 'endrpcen', 'uselib',
'gettyen', 'umoun', 'freelocal', 'listxatt', 'isnan', 'isinf',
'setrlimi', 'getspen', 'pthread_mutex_lock', 'getopt_long_onl',
])
print table.AsCSource('TestGnu', 'CRAZY_LINKER_GNU_HASH_TABLE_TEST_DATA_H')
if __name__ == "__main__":
main()