src/third_party/brotli/research/brotlidump.py - cobalt - Git at Google

 #!python3
 """Program to dump contents of Brotli compressed files showing the compression format.
 Jurjen N.E. Bos, 2016.
 I found the following issues with the Brotli format:
 - The distance alphabet has size 16+(48<<POSTFIX),
   but the last symbols are useless.
   It could be lowered to 16+(44-POSTFIX<<POSTFIX), and this could matter.
 - The block type code is useless if NBLTYPES==2, you would only need 1 symbol
   anyway, so why don't you just switch to "the other" type?
 """
 import struct
 from operator import itemgetter, methodcaller
 from itertools import accumulate, repeat
 from collections import defaultdict, deque
 from functools import partial

 class InvalidStream(Exception): pass
 #lookup table
 L, I, D = "literal", "insert&copy", "distance"
 pL, pI, pD = 'P'+L, 'P'+I, 'P'+D

 def outputCharFormatter(c):
     """Show character in readable format
     """
     #TODO 2: allow hex only output
     if 32<c<127: return chr(c)
     elif c==10: return '\\n'
     elif c==13: return '\\r'
     elif c==32: return '" "'
     else: return '\\x{:02x}'.format(c)

 def outputFormatter(s):
     """Show string or char.
     """
     result = ''
     def formatSubString(s):
         for c in s:
             if c==32: yield ' '
             else: yield outputCharFormatter(c)
     if len(result)<200: return ''.join(formatSubString(s))
     else:
         return ''.join(formatSubString(s[:100]))+'...'+ \
                ''.join(formatSubString(s[-100:]))


 class BitStream:
     """Represent a bytes object. Can read bits and prefix codes the way
     Brotli does.
     """
     def __init__(self, byteString):
         self.data = byteString
         #position in bits: byte pos is pos>>3, bit pos is pos&7
         self.pos = 0

     def __repr__(self):
         """Representation
         >>> olleke
         BitStream(pos=0:0)
         """
         return "BitStream(pos={:x}:{})".format(self.pos>>3, self.pos&7)

     def read(self, n):
         """Read n bits from the stream and return as an integer.
         Produces zero bits beyond the stream.
         >>> olleke.data[0]==27
         True
         >>> olleke.read(5)
         27

         >>> olleke
         BitStream(pos=0:5)
         """
         value = self.peek(n)
         self.pos += n
         if self.pos>len(self.data)*8:
             raise ValueError('Read past end of stream')
         return value

     def peek(self, n):
         """Peek an n bit integer from the stream without updating the pointer.
         It is not an error to read beyond the end of the stream.
         >>> olleke.data[:2]==b'\x1b\x2e' and 0x2e1b==11803
         True
         >>> olleke.peek(15)
         11803
         >>> hex(olleke.peek(32))
         '0x2e1b'
         """
         #read bytes that contain the data: self.data[self.pos>>3:self.pos+n+7>>3]
         #convert to int: int.from_bytes(..., 'little')
         #shift out the bits from the first byte: >>(self.pos&7)
         #mask unwanted bits: & (1<<n)-1
         return int.from_bytes(
             self.data[self.pos>>3:self.pos+n+7>>3],
             'little')>>(self.pos&7) & (1<<n)-1

     def readBytes(self, n):
         """Read n bytes from the stream on a byte boundary.
         """
         if self.pos&7: raise ValueError('readBytes: need byte boundary')
         result = self.data[self.pos>>3:(self.pos>>3)+n]
         self.pos += 8*n
         return result

 #-----------------------Symbol-------------------------------------------
 class Symbol:
     """A symbol in a code.
     Refers back to the code that contains it.
     Index is the place in the alphabet of the symbol.
     """
     __slots__ = 'code', 'index'
     def __init__(self, code, value):
         self.code = code
         self.index = value

     def __repr__(self):
         return 'Symbol({}, {})'.format(self.code.name, self.index)

     def __len__(self):
         """Number of bits in the prefix notation of this symbol
         """
         return self.code.length(self.index)

     def __int__(self):
         return self.index

     #these routines call equivalent routine in Code class
     def bitPattern(self):
         """Value of the symbol in the stream
         """
         return self.code.bitPattern(self.index)

     def extraBits(self):
         """Number of extra bits to read for this symbol
         """
         return self.code.extraBits(self.index)

     def __str__(self):
         """Short descriptor of the symbol without extra bits.
         """
         return self.code.mnemonic(self.index)

     #requiring optional extra bits, if self.code supports them
     def value(self, extra=None):
         """The value used for processing. Can be a tuple.
         with optional extra bits
         """
         if isinstance(self.code, WithExtra):
             if not 0<=extra<1<<self.extraBits():
                 raise ValueError("value: extra value doesn't fit in extraBits")
             return self.code.value(self.index, extra)
         if extra is not None:
             raise ValueError('value: no extra bits for this code')
         return self.code.value(self.index)

     def explanation(self, extra=None):
         """Long explanation of the value from the numeric value
         with optional extra bits
         Used by Layout.verboseRead when printing the value
         """
         if isinstance(self.code, WithExtra):
             return self.code.callback(self, extra)
         return self.code.callback(self)

 #========================Code definitions==================================
 class RangeDecoder:
     """A decoder for the Code class that assumes the symbols
     are encoded consecutively in binary.
     It all depends on the "alphabetSize" property.
     The range runs from 0 to alphabetSize-1.
     This is the default decoder.
     """
     def __init__(self, *, alphabetSize=None, bitLength=None, **args):
         if bitLength is not None: alphabetSize = 1<<bitLength
         if alphabetSize is not None:
             self.alphabetSize = alphabetSize
             self.maxLength = (alphabetSize-1).bit_length()

     def __len__(self):
         return self.alphabetSize

     def __iter__(self):
         """Produce all symbols.
         """
         return map(partial(Symbol, self), range(len(self)))

     def __getitem__(self, index):
         if index>=self.alphabetSize: raise ValueError('index out of range')
         return Symbol(self, index)

     def bitPattern(self, index):
         return '{:0{}b}'.format(index, self.maxLength)

     def length(self, index):
         """Encoding length of given symbol.
         Does not depend on index in this case.
         """
         return self.maxLength

     def decodePeek(self, data):
         """Find which symbol index matches the given data (from peek, as a number)
         and return the number of bits decoded.
         Can also be used to figure out length of a symbol.
         """
         return self.maxLength, Symbol(self, data&(1<<self.maxLength)-1)

 class PrefixDecoder:
     """A decoder for the Code class that uses a prefix code.
     The code is determined by encoding:
     encode[p] gives the index corresponding to bit pattern p.
     Used setDecode(decodeTable) to switch the decoder from the default
     to a prefix decoder, or pass decodeTable at init.
     You can also use setLength(lengthTable)
     to define the encoding from the lengths.
     The set of symbol values does not need to be consecutive.
     """
     def __init__(self, *, decodeTable=None, **args):
         if decodeTable is not None: self.setDecode(decodeTable)

     def __len__(self):
         return len(self.decodeTable)

     def __iter__(self):
         def revBits(index):
             return self.bitPattern(index)[::-1]
         return (
             Symbol(self, index)
             for index in sorted(self.decodeTable.values(), key=revBits)
             )

     def __getitem__(self, index):
         if index not in self.lengthTable:
             raise ValueError('No symbol {}[{}]'.format(
                 self.__class__.__name__, index))
         return Symbol(self, index)

     def bitPattern(self, index):
         bits = next(b for (b,s) in self.decodeTable.items() if s==index)
         return '{:0{}b}'.format(bits, self.length(index))

     def length(self, index):
         """Encoding length of given symbol.
         """
         return self.lengthTable[index]

     def decodePeek(self, data):
         """Find which symbol index matches the given data (from peek, as a number)
         and return the number of bits decoded.
         Can also be used to figure out length of a symbol.
         """
         #do binary search for word length
         #invariant: lo<=length<=hi
         lo, hi = self.minLength, self.maxLength
         while lo<=hi:
             mid = lo+hi>>1
             #note lo<=mid<hi at this point
             mask = (1<<mid)-1
             #lets see what happens if we guess length is mid
             try: index = self.decodeTable[data&mask]
             except KeyError:
                 #too many bits specified, reduce estimated length
                 hi = mid-1
                 continue
             #we found a symbol, but there could be a longer match
             symbolLength = self.lengthTable[index]
             if symbolLength<=mid:
                 #all bits match, symbol must be right
                 return symbolLength, Symbol(self, index)
             #there must be more bits to match
             lo = mid+1
         return lo, Symbol(self, index)

     #routine to set up the tables
     def setDecode(self, decodeTable):
         """Store decodeTable,
         and compute lengthTable, minLength, maxLength from encodings.
         """
         self.decodeTable = decodeTable
         #set of symbols with unknown length
         todo = set(decodeTable)
         #bit size under investigation
         maskLength = 0
         lengthTable = {}
         while todo:
             mask = (1<<maskLength)-1
             #split the encodings that we didn't find yet using b bits
             splitSymbols = defaultdict(list)
             for s in todo: splitSymbols[s&mask].append(s)
             #unique encodings have a length of maskLength bits
             #set length, and remove from todo list
             for s,subset in splitSymbols.items():
                 if len(subset)==1:
                     lengthTable[self.decodeTable[s]] = maskLength
                     todo.remove(s)
             #now investigate with longer mask
             maskLength +=1
         #save result
         self.lengthTable = lengthTable
         self.minLength = min(lengthTable.values())
         self.maxLength = max(lengthTable.values())
         self.switchToPrefix()

     def setLength(self, lengthTable):
         """Given the bit pattern lengths for symbols given in lengthTable,
         set decodeTable, minLength, maxLength
         """
         self.lengthTable = lengthTable
         self.minLength = min(lengthTable.values())
         self.maxLength = max(lengthTable.values())
         #compute the backwards codes first; then reverse them
         #compute (backwards) first code for every separate lengths
         nextCodes = []
         #build codes for each length, from right to left
         code = 0
         for bits in range(self.maxLength+1):
             code <<= 1
             nextCodes.append(code)
             code += sum(x==bits for x in lengthTable.values())
         self.decodeTable = {}
         #count codes for each length, and store reversed in the table
         for symbol in sorted(lengthTable):
             bits = lengthTable[symbol]
             bitpattern = '{:0{}b}'.format(nextCodes[bits], bits)
             self.decodeTable[int(bitpattern[::-1], 2)] = symbol
             nextCodes[bits] += 1
         self.switchToPrefix()

     def switchToPrefix(self):
         """This routine makes sure the prefix decoder is activated.
         """
         self.mode = PrefixDecoder

 class Code(RangeDecoder, PrefixDecoder):
     """An alphabet of symbols, that can be read from a stream.
     If you use setDecode or setLength, you have a prefix code,
     otherwise you have a range code.
     Features:
     code[index] produces symbol with given index
     value(index): value of symbol
     mnemonic(index): short description of symbol
     explanation(index): show meaning of symbol, shown in Layout.verboseRead
     iter(code): produce all symbols in some order
     name: show as context in Layout.verboseRead
     """
     name = '?'
     #callback is a function that gets the symbol and the extra bits
     #default callback calls explanation
     def __init__(self, name=None, *, callback=None, description='', **args):
         """Don't forget to set either alphabetSize or decodeTable
         """
         #set name when provided, otherwise take class variable
         if name is not None: self.name = name
         if callback is not None: self.callback = callback
         self.description = description
         #mode switch
         if 'bitLength' in args or 'alphabetSize' in args:
             self.mode = RangeDecoder
             RangeDecoder.__init__(self, **args)
         elif 'decodeTable' in args:
             self.mode = PrefixDecoder
             PrefixDecoder.__init__(self, **args)
         else:
             super().__init__(**args)

     def __repr__(self):
         return self.__class__.__name__+' '+self.name

     #the routines that get switched between RangeDecoder and PrefixDecoder
     def __len__(self): return self.mode.__len__(self)
     def __iter__(self): return self.mode.__iter__(self)
     def __getitem__(self, index): return self.mode.__getitem__(self, index)
     def bitPattern(self, index): return self.mode.bitPattern(self, index)
     def length(self, index): return self.mode.length(self, index)
     def decodePeek(self, data): return self.mode.decodePeek(self, data)
     #general routines
     def value(self, index, extra=None):
         """Get value of symbol for computations.
         Override where needed.
         """
         if extra is not None:
             raise ValueError('value: no extra for this symbol')
         return index

     def mnemonic(self, index):
         """Give mnemonic of symbol.
         Override where needed.
         """
         return str(self.value(index))

     def callback(self, symbol):
         return self.explanation(symbol.index)

     def explanation(self, index):
         """Long explanation of the value from the numeric value
         This is a default routine.
         You can customize in three ways:
         - set description to add some text
         - override to get more control
         - set callback to make it dependent on you local variables
         """
         value = self.value(index)
         return '{0}{1}: {2}'.format(
             self.description and self.description+': ',
             self.bitPattern(index),
             value,
             )

     def extraBits(self, index):
         return 0

     #Routines that use the decode interface
     def showCode(self, width=80):
         """Show all words of the code in a nice format.
         """
         #make table of all symbols with binary strings
         symbolStrings = [
             (self.bitPattern(s.index), self.mnemonic(s.index))
             for s in self
             ]
         #determine column widths the way Lisp programmers do it
         leftColWidth, rightColWidth = map(max, map(
             map,
             repeat(len),
             zip(*symbolStrings)
             ))
         colwidth = leftColWidth+rightColWidth
         columns = 81//(colwidth+2)
         rows = -(-len(symbolStrings)//columns)
         def justify(bs):
             b,s = bs
             return b.rjust(leftColWidth)+':'+s.ljust(rightColWidth)
         for i in range(rows):
             print(' '.join(map(justify, symbolStrings[i::rows])).rstrip())

     def readTuple(self, stream):
         """Read symbol from stream. Returns symbol, length.
         """
         length, symbol = self.decodePeek(stream.peek(self.maxLength))
         stream.pos += length
         return length, symbol

     def readTupleAndExtra(self, stream):
         return self.readTuple(stream)+(0, None)

 class WithExtra(Code):
     """Extension for Code so that symbol may have extra bits associated.
     If you supply an extraTable, you can use extraBits
     You can define an extraTable,
     which allows to call extraBits to get the number of extraBits.
     Otherwise, you can supply extraBits yourself.
     Routine readTupleAndExtra now reads the extra bits too.
     Value probably needs to be overridden; see Enumerator.
     Note: this does not give you an decodeTable.
     """
     #redefine these if you don't want to use an extraTable
     def extraBits(self, index):
         """Get the number of extra bits for this symbol.
         """
         return self.extraTable[index]

     def mnemonic(self, index):
         """This value must be independent of extra.
         """
         return str(index)

     def readTupleAndExtra(self, stream):
         """Read symbol and extrabits from stream.
         Returns symbol length, symbol, extraBits, extra
         >>> olleke.pos = 6
         >>> MetablockLengthAlphabet().readTupleAndExtra(olleke)
         (2, Symbol(MLEN, 4), 16, 46)
         """
         length, symbol = self.decodePeek(stream.peek(self.maxLength))
         stream.pos += length
         extraBits = self.extraBits(symbol.index)
         return length, symbol, extraBits, stream.read(extraBits)

     def explanation(self, index, extra=None):
         """Expanded version of Code.explanation supporting extra bits.
         If you don't supply extra, it is not mentioned.
         """
         extraBits = 0 if extra is None else self.extraBits(index)
         if not hasattr(self, 'extraTable'):
             formatString = '{0}{3}'
             lo = hi = value = self.value(index, extra)
         elif extraBits==0:
             formatString = '{0}{2}: {3}'
             lo, hi = self.span(index)
             value = lo
         else:
             formatString = '{0}{1} {2}: {3}-{4}; {3}+{5}={6}'
             lo, hi = self.span(index)
             value = lo+extra
         return formatString.format(
             self.description and self.description+': ',
             'x'*extraBits,
             self.bitPattern(index),
             lo, hi,
             extra,
             value,
             )

     def callback(self, symbol, extra):
         return self.explanation(symbol.index, extra)

 class BoolCode(Code):
     """Same as Code(bitLength=1), but shows a boolean.
     """
     def __init__(self, name=None, **args):
         super().__init__(name, bitLength=1, **args)

     def value(self, index, extra=None):
         return bool(super().value(index, extra))

 class Enumerator(WithExtra):
     """Code that is defined by the ExtraTable.
     extraTable is a class variable that contains
     the extraBits of the symbols from 0
     value0 contains the value of symbol 0
     encodings is not neccessary, but allowed.
     Note: place for FixedCode to make sure extraBits works
     """
     def __init__(self, name=None, **args):
         #if there is no decodeTable to determine length, compute it ourselves
         if 'decodeTable' not in args:
             args['alphabetSize'] = len(self.extraTable)
         super().__init__(name, **args)

     def __len__(self):
         return len(self.extraTable)

     def __getitem__(self, index):
         """Faster than PrefixDecoder
         """
         if index>=len(self.extraTable):
             raise ValueError("No symbol {}[{}]".format(
                 self.__class__.__name__, index))
         return Symbol(self, index)

     def value(self, index, extra):
         """Override if you don't define value0 and extraTable
         """
         lower, upper = self.span(index)
         value = lower+(extra or 0)
         if value>upper:
             raise ValueError('value: extra out of range')
         return value

     def span(self, index):
         """Give the range of possible values in a tuple
         Useful for mnemonic and explanation
         """
         lower = self.value0+sum(1<<x for x in self.extraTable[:index])
         upper = lower+(1<<self.extraTable[index])
         return lower, upper-1

 #======================Code subclasses======================================
 #Alphabets used in the metablock header----------------------------------
 #For prefix codes
 class PrefixCodeHeader(WithExtra):
     """Header of prefix codes.
     """
     def __init__(self, codename):
         super().__init__('PFX', bitLength=2)
         #this is the name of the code that it describes
         self.codename = codename

     def extraBits(self, index):
         return 2 if index==1 else 0

     def value(self, index, extra):
         """Returns ('Simple', #codewords) or ('Complex', HSKIP)
         """
         if index==1:
             if extra>3:
                 raise ValueError('value: extra out of range')
             return 'Simple', extra+1
         if extra:
             raise ValueError('value: extra out of range')
         return 'Complex', index

     def explanation(self, index, extra):
         if index==1:
             return '{} is simple with {} code word{}'.format(
                 self.codename, extra+1, 's' if extra else '')
         lengths = [1, 2, 3, 4, 0, 5, 17, 6]
         return '{} is complex with lengths {}...'.format(
             self.codename,
             ','.join(
                 map(str, lengths[index:index+5]))
             )

 class TreeShapeAlhabet(BoolCode):
     """The bit used to indicate if four word code is "deep" or "wide"
     """
     name = 'SHAPE'
     def value(self, index):
         return [(2,2,2,2), (1,2,3,3)][index]

     def explanation(self, index):
         return str(bool(index))+': lengths {},{},{},{}'.format(*self.value(index))

 class LengthOfLengthAlphabet(Code):
     """For use in decoding complex code descriptors.
     >>> lengthOfLengthAlphabet = LengthOfLengthAlphabet('')
     >>> print(lengthOfLengthAlphabet[2])
     coded with 2 bits
     >>> len(lengthOfLengthAlphabet[0])
     2
     >>> [len(lengthOfLengthAlphabet[x]) for x in range(6)]
     [2, 4, 3, 2, 2, 4]
     >>> lengthOfLengthAlphabet.showCode()
       00:skipped             01:coded with 4 bits 0111:coded with 1 bits
       10:coded with 3 bits  011:coded with 2 bits 1111:coded with 5 bits
     """
     decodeTable = {
          0b00:0,     0b10:3,
        0b0111:1,     0b01:4,
         0b011:2,   0b1111:5,
        }

     def __init__(self, name=None, **args):
         super().__init__(name, decodeTable=self.decodeTable, **args)

     def mnemonic(self, index):
         if index==0: return 'skipped'
         return 'coded with {} bits'.format(index)

     def explanation(self, index, extra=None):
         return self.description+': '+self.mnemonic(index)

 class LengthAlphabet(WithExtra):
     """Length of symbols
     Used during construction of a code.
     """
     def __init__(self, name):
         super().__init__(name, alphabetSize=18)

     def extraBits(self, index):
         return {16:2, 17:3}.get(index, 0)

     def mnemonic(self, index):
         if index==0: return 'unused'
         elif index==16: return 'rep xx'
         elif index==17: return 'zero xxx'
         else: return 'len {}'.format(index)

     def explanation(self, index, extra):
         return self.description.format(self[index], extra)

     def value(self, index, extra):
         #the caller got the length already, so extra is enough
         return extra

 #Stream header
 class WindowSizeAlphabet(Code):
     """The alphabet used for window size in the stream header.
     >>> WindowSizeAlphabet()[10].explanation()
     'windowsize=(1<<10)-16=1008'
     """
     decodeTable = {
         0b0100001: 10,   0b1100001: 14,   0b0011: 18,   0b1011: 22,
         0b0110001: 11,   0b1110001: 15,   0b0101: 19,   0b1101: 23,
         0b1000001: 12,         0b0: 16,   0b0111: 20,   0b1111: 24,
         0b1010001: 13,   0b0000001: 17,   0b1001: 21,
         0b0010001: None,
         }

     name = 'WSIZE'

     def __init__(self, name=None):
         super().__init__(name, decodeTable=self.decodeTable)

     def value(self, index):
         #missing value gives index None
         if index is None: return None
         return (1<<index)-16

     def explanation(self, index):
         return 'windowsize=(1<<{})-16={}'.format(
             index, (1<<index)-16)

 #Metablock
 class MetablockLengthAlphabet(WithExtra):
     """Used for the meta block length;
     also indicates a block with no data
     >>> metablockLengthAlphabet = MetablockLengthAlphabet()
     >>> metablockLengthAlphabet[0]; str(metablockLengthAlphabet[0])
     Symbol(MLEN, 0)
     'empty'
     >>> metablockLengthAlphabet[3]
     Traceback (most recent call last):
         ...
     ValueError: No symbol MetablockLengthAlphabet[3]
     >>> print(metablockLengthAlphabet[4])
     hhhh00
     >>> metablockLengthAlphabet[4].value(0x1000)
     4097
     >>> metablockLengthAlphabet[5].value(0x1000)
     Traceback (most recent call last):
         ...
     InvalidStream: Zeros in high nibble of MLEN
     >>> metablockLengthAlphabet[5].explanation(0x12345)
     'data length: 12345h+1=74566'
     >>> metablockLengthAlphabet.showCode()
     00:hhhh00   10:hhhhhh10 01:hhhhh01  11:empty
     """
     decodeTable = {0b11:0, 0b00:4, 0b01:5, 0b10:6}

     name = 'MLEN'
     def __init__(self, name=None):
         super().__init__(name, decodeTable=self.decodeTable)

     def extraBits(self, index):
         return index*4

     def mnemonic(self, index):
         if index==0: return 'empty'
         return 'h'*(self.extraBits(index)//4)+self.bitPattern(index)

     def value(self, index, extra):
         extraBits = self.extraBits(index)
         if not 0<=extra<1<<extraBits:
             raise ValueError('value: extra out of range')
         if index==0: return 0
         if index>4 and extra>>extraBits-4==0: raise InvalidStream(
             'Zeros in high nibble of MLEN')
         return extra+1

     def explanation(self, index, extra):
         if index==0: return '11: empty block'
         extraBits = self.extraBits(index)
         return 'data length: {:0{}x}h+1={}'.format(extra, extraBits//4, extra+1)


 class ReservedAlphabet(BoolCode):
     """The reserved bit that must be zero.
     """
     name = 'RSVD'
     def value(self, index):
         if index: raise ValueError('Reserved bit is not zero')

     def explanation(self, index):
         return 'Reserved (must be zero)'

 class FillerAlphabet(Code):
     def __init__(self, *, streamPos):
         super().__init__('SKIP', bitLength=(-streamPos)&7)

     def explanation(self, index):
         return '{} bit{} ignored'.format(
             self.length(index),
             '' if self.length(index)==1 else 's',
             )

 class SkipLengthAlphabet(WithExtra):
     """Used for the skip length in an empty metablock
     >>> skipLengthAlphabet = SkipLengthAlphabet()
     >>> skipLengthAlphabet[0]; str(skipLengthAlphabet[0])
     Symbol(SKIP, 0)
     'empty'
     >>> skipLengthAlphabet[4]
     Traceback (most recent call last):
         ...
     ValueError: index out of range
     >>> print(skipLengthAlphabet[3])
     hhhhhh11
     >>> skipLengthAlphabet[2].value(0x1000)
     4097
     >>> skipLengthAlphabet[3].value(0x1000)
     Traceback (most recent call last):
         ...
     InvalidStream: Zeros in high byte of SKIPBYTES
     >>> skipLengthAlphabet[3].explanation(0x12345)
     'skip length: 12345h+1=74566'
     >>> skipLengthAlphabet.showCode()
     00:empty    01:hh01     10:hhhh10   11:hhhhhh11
     """
     def __init__(self):
         super().__init__('SKIP', bitLength=2)

     def extraBits(self, index):
         return index*8

     def mnemonic(self, index):
         if index==0: return 'empty'
         return 'h'*(self.extraBits(index)//4)+self.bitPattern(index)

     def value(self, index, extra):
         extraBits = self.extraBits(index)
         if not 0<=extra<1<<extraBits:
             raise ValueError('value: extra out of range')
         if index==0: return 0
         if index>1 and extra>>extraBits-8==0:
             raise InvalidStream('Zeros in high byte of SKIPBYTES')
         return extra+1

     def explanation(self, index, extra):
         if index==0: return '00: no skip'
         extraBits = self.extraBits(index)
         return 'skip length: {:{}x}h+1={}'.format(extra, extraBits//8, extra+1)


 class TypeCountAlphabet(Enumerator):
     """Used for giving block type counts and tree counts.
     >>> TypeCountAlphabet(description='').showCode()
        0:0            0101:xx,0101      1011:xxxxx,1011
     0001:0001         1101:xxxxxx,1101  0111:xxx,0111
     1001:xxxx,1001    0011:x,0011       1111:xxxxxxx,1111
     """
     decodeTable = {
              0b0: 0,   0b1001: 5,
           0b0001: 1,   0b1011: 6,
           0b0011: 2,   0b1101: 7,
           0b0101: 3,   0b1111: 8,
           0b0111: 4,
           }

     value0 = 1
     extraTable = [0, 0, 1, 2, 3, 4, 5, 6, 7]
     name = 'BT#'

     def __init__(self, name=None, *, description):
         super().__init__(
             name,
             decodeTable=self.decodeTable,
             description=description)

     def mnemonic(self, index):
         if index==0: return '0'
         if index==1: return '0001'
         return 'x'*(self.extraBits(index))+','+self.bitPattern(index)

     def explanation(self, index, extra):
         value = self.value(index, extra)
         description = self.description
         if value==1: description = description[:-1]
         return '{}: {} {}'.format(
             self.mnemonic(index),
             value,
             description)

 class BlockTypeAlphabet(Code):
     """The block types; this code works for all three kinds.
     >>> b = BlockTypeAlphabet('T', NBLTYPES=5)
     >>> print(*(x for x in b))
     prev +1 #0 #1 #2 #3 #4
     """
     def __init__(self, name, NBLTYPES, **args):
         super().__init__(name, alphabetSize=NBLTYPES+2, **args)
         self.NBLTYPES = NBLTYPES

     def mnemonic(self, index):
         if index==0: return 'prev'
         elif index==1: return '+1'
         else: return '#'+str(index-2)

     def value(self, index):
         return index-2

     def explanation(self, index):
         if index==0: return '0: previous'
         elif index==1: return '1: increment'
         else: return 'Set block type to: '+str(index-2)

 class BlockCountAlphabet(Enumerator):
     """Block counts
     >>> b = BlockCountAlphabet('L')
     >>> print(b[25])
     [24*x]: BC16625-16793840
     """

     value0 = 1
     extraTable = [2,2,2,2,3, 3,3,3,4,4, 4,4,5,5,5, 5,6,6,7,8, 9,10,11,12,13, 24]
     def __init__(self, name, **args):
         super().__init__(name, alphabetSize=26, **args)

     def mnemonic(self, index):
         extraBits = self.extraBits(index)
         return '{}: BC{}-{}'.format(
             'x'*extraBits if index<5 else '[{}*x]'.format(extraBits),
             *self.span(index))

     def explanation(self, index, extra):
         return 'Block count: '+super().explanation(index, extra)

 class DistanceParamAlphabet(WithExtra):
     """The distance parameters NPOSTFIX and NDIRECT.
     Although these are treated as two in the description, this is easier.
     """
     def __init__(self):
         super().__init__('DIST', bitLength=2)

     def extraBits(self, index):
         return 4

     def value(self, index, extra):
         """Returns NPOSTFIX and NDIRECT<<NPOSTFIX
         """
         if extra>15:
             raise ValueError('value: extra out of range')
         return index, extra<<index

     def explanation(self, index, extra):
         return '{} postfix bits and {:04b}<<{}={} direct codes'.format(
             index, extra, index, extra<<index)

     def mnemonic(self, index):
         return 'PF'+str(index)

 class LiteralContextMode(Code):
     """For the literal context modes.
     >>> LiteralContextMode().showCode()
     00:LSB6   01:MSB6   10:UTF8   11:Signed
     >>> LiteralContextMode().explanation(2)
     'Context mode for type 9: 2(UTF8)'
     """

     def __init__(self, *, number=9):
         super().__init__('LC'+str(number), bitLength=2)
         self.number = number

     def mnemonic(self, index):
         return ['LSB6', 'MSB6', 'UTF8', 'Signed'][index]

     def explanation(self, index):
         return 'Context mode for type {}: {}({})'.format(
             self.number,
             index,
             self.mnemonic(index))

 class RLEmaxAlphabet(Enumerator):
     """Used for describing the run length encoding used for describing context maps.
     >>> RLEmaxAlphabet().showCode()
     0:1    1:more
     """
     value0 = 0
     extraTable = [0, 4]
     name = 'RLE#'

     def mnemonic(self, index):
         return ['1', 'more'][index]

     def explanation(self, index, extra):
         description = self.description and self.description+': '
         if index==0: return description+'No RLE coding'
         return '{}xxxx 1: RLEMAX={}'.format(description, extra+1)

 class TreeAlphabet(WithExtra):
     """The alphabet to enumerate entries (called trees) in the context map.
     parameters are RLEMAX and NTREES
     >>> t = TreeAlphabet('', RLEMAX=3, NTREES=5)
     >>> len(t)
     8
     >>> print(t[2])
     xx+4 zeroes
     >>> t[3].explanation(2)
     '8+010=10 zeroes'
     >>> t[0].value(0)
     (1, 0)
     """
     name = 'CMI'
     def __init__(self, name=None, *, RLEMAX, NTREES, **args):
         super().__init__(name, alphabetSize=RLEMAX+NTREES, **args)
         self.RLEMAX = RLEMAX
         self.NTREES = NTREES

     def extraBits(self, index):
         if 0<index<=self.RLEMAX: return index
         return 0

     def mnemonic(self, index):
         if index==0: return 'map #0'
         if index<=self.RLEMAX:
             return '{}+{} zeroes'.format('x'*index, 1<<index)
         return 'map #{}'.format(index-self.RLEMAX)

     def value(self, index, extra):
         """Give count and value."""
         index = index
         if index==0: return 1, 0
         if index<=self.RLEMAX: return (1<<index)+extra, 0
         return 1, index-self.RLEMAX

     def explanation(self, index, extra):
         description = self.description and self.description+': '
         if index==0: return description+'map #0'
         if index<=self.RLEMAX:
             return '{}+{:0{}b}={} zeroes'.format(
                 (1<<index),
                 extra, self.extraBits(index),
                 (1<<index)+extra)
         return '{}map #{}-{}={}'.format(
             description,
             index, self.RLEMAX, index-self.RLEMAX)

 #Prefix alphabets for the data stream----------------------------------
 class LiteralAlphabet(Code):
     """Alphabet of symbols.
     """
     minLength = maxLength = 8
     def __init__(self, number):
         super().__init__('L'+str(number), alphabetSize=1<<8)

     def mnemonic(self, index):
         return outputCharFormatter(index)

     def value(self, index, extra=None):
         return index

     def explanation(self, index, extra=None):
         return self.mnemonic(index)

 class InsertLengthAlphabet(Enumerator):
     """Intern code for insert counts
     """
     value0 = 0
     extraTable = [0,0,0,0,0, 0,1,1,2,2, 3,3,4,4,5, 5,6,7,8,9, 10,12,14,24]

 class CopyLengthAlphabet(Enumerator):
     value0 = 2
     extraTable = [0,0,0,0,0, 0,0,0,1,1, 2,2,3,3,4, 4,5,5,6,7, 8,9,10,24]

 class InsertAndCopyAlphabet(WithExtra):
     """The insert and copy code
     >>> for x in range(0,704,704//13):
     ...    print('{:10b}'.format(x), InsertAndCopyAlphabet()[x])
              0 I0C2&D=0
         110110 I6+xC8&D=0
        1101100 I5C22+xxx&D=0
       10100010 I4C4
       11011000 I3C10+x
      100001110 I14+xxC8
      101000100 I10+xxC22+xxx
      101111010 I98+xxxxxC14+xx
      110110000 I6+xC70+xxxxx
      111100110 I1090+[10*x]C8
     1000011100 I26+xxxC326+[8*x]
     1001010010 I322+[8*x]C14+xx
     1010001000 I194+[7*x]C70+xxxxx
     1010111110 I22594+[24*x]C1094+[10*x]
     """
     insertLengthAlphabet = InsertLengthAlphabet(None)
     copyLengthAlphabet = CopyLengthAlphabet(None)

     def __init__(self, number=''):
         super().__init__('IC'+str(number), bitLength=10)

     def __len__(self):
         return 704

     def extraBits(self, index):
         insertSymbol, copySymbol, dist0 = self.splitSymbol(index)
         return InsertLengthAlphabet.extraTable[insertSymbol.index] + \
             CopyLengthAlphabet.extraTable[copySymbol.index]

     def splitSymbol(self, index):
         """Give relevant values for computations:
         (insertSymbol, copySymbol, dist0flag)
         """
         #determine insert and copy upper bits from table
         row = [0,0,1,1,2,2,1,3,2,3,3][index>>6]
         col = [0,1,0,1,0,1,2,0,2,1,2][index>>6]
         #determine inserts and copy sub codes
         insertLengthCode = row<<3 | index>>3&7
         if row: insertLengthCode -= 8
         copyLengthCode = col<<3 | index&7
         return (
             Symbol(self.insertLengthAlphabet, insertLengthCode),
             Symbol(self.copyLengthAlphabet, copyLengthCode),
             row==0
             )

     def mnemonic(self, index):
         """Make a nice mnemonic
         """
         i,c,d0 = self.splitSymbol(index)
         iLower, _ = i.code.span(i.index)
         iExtra = i.extraBits()
         cLower, _ = c.code.span(c.index)
         cExtra = c.extraBits()
         return 'I{}{}{}C{}{}{}{}'.format(
             iLower,
             '+' if iExtra else '',
             'x'*iExtra if iExtra<6 else '[{}*x]'.format(iExtra),
             cLower,
             '+' if cExtra else '',
             'x'*cExtra if cExtra<6 else '[{}*x]'.format(cExtra),
             '&D=0' if d0 else '')

     def value(self, index, extra):
         i,c,d0 = self.splitSymbol(index)
         iExtra = i.extraBits()
         ce, ie = extra>>iExtra, extra&(1<<iExtra)-1
         insert = i.value(ie)
         copy = c.value(ce)
         return insert, copy, d0

     def explanation(self, index, extra):
         insert, copy, d0 = self.value(index, extra)
         if d0: return 'Literal: {}, copy: {}, same distance'.format(insert, copy)
         else: return 'Literal: {}, copy: {}'.format(insert, copy)

 class DistanceAlphabet(WithExtra):
     """Represent the distance encoding.
     Dynamically generated alphabet.
     This is what the documentation should have said:
     Ignoring offsets for the moment, the "long" encoding works as follows:
     Write the distance in binary as follows:
     1xy..yz..z, then the distance symbol consists of n..nxz..z
     Where:
     n is one less than number of bits in y
     x is a single bit
     y..y are n+1 extra bits (encoded in the bit stream)
     z..z is NPOSTFIX bits that are part of the symbol
     The offsets are so as to start at the lowest useable value:
     if 1xyyyyz = distance +(4<<POSTFIX)-NDIRECT-1
     then n..nxz..z is symbol -NDIRECT-16
     >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
     >>> print(d[4], d[17], d[34])
     last-1 1 10xx00-5
     >>> [str(d[x]) for x in range(26, 32)]
     ['10x00-5', '10x01-5', '10x10-5', '10x11-5', '11x00-5', '11x01-5']
     """
     def __init__(self, number, *, NPOSTFIX, NDIRECT):
         self.NPOSTFIX = NPOSTFIX
         self.NDIRECT = NDIRECT
         #set length
         #Actually, not all symbols are used,
         #only NDIRECT+16+(44-2*POSTFIX<<NPOSTFIX)
         super().__init__('D'+str(number),
             alphabetSize=self.NDIRECT+16+(48<<self.NPOSTFIX))

     def extraBits(self, index):
         """Indicate how many extra bits are needed to interpret symbol
         >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
         >>> [d[i].extraBits() for i in range(26)]
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
         >>> [d[i].extraBits() for i in range(26,36)]
         [1, 1, 1, 1, 1, 1, 1, 1, 2, 2]
         """
         if index<16+self.NDIRECT: return 0
         return 1 + ((index - self.NDIRECT - 16) >> (self.NPOSTFIX + 1))

     def value(self, dcode, dextra):
         """Decode value of symbol together with the extra bits.
         >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
         >>> d[34].value(2)
         (0, 35)
         """
         if dcode<16:
             return [(1,0),(2,0),(3,0),(4,0),
                     (1,-1),(1,+1),(1,-2),(1,+2),(1,-3),(1,+3),
                     (2,-1),(2,+1),(2,-2),(2,+2),(2,-3),(2,+3)
                 ][dcode]
         if dcode<16+self.NDIRECT:
             return (0,dcode-16)
         #we use the original formulas, instead of my clear explanation
         POSTFIX_MASK = (1 << self.NPOSTFIX) - 1
         ndistbits = 1 + ((dcode - self.NDIRECT - 16) >> (self.NPOSTFIX + 1))
         hcode = (dcode - self.NDIRECT - 16) >> self.NPOSTFIX
         lcode = (dcode - self.NDIRECT - 16) & POSTFIX_MASK
         offset = ((2 + (hcode & 1)) << ndistbits) - 4
         distance = ((offset + dextra) << self.NPOSTFIX) + lcode + self.NDIRECT + 1
         return (0,distance)

     def mnemonic(self, index, verbose=False):
         """Give mnemonic representation of meaning.
         verbose compresses strings of x's
         """
         if index<16:
             return ['last', '2last', '3last', '4last',
                 'last-1', 'last+1', 'last-2', 'last+2', 'last-3', 'last+3',
                 '2last-1', '2last+1', '2last-2', '2last+2', '2last-3', '2last+3'
                 ][index]
         if index<16+self.NDIRECT:
             return str(index-16)
         #construct strings like "1xx01-15"
         index -= self.NDIRECT+16
         hcode = index >> self.NPOSTFIX
         lcode = index & (1<<self.NPOSTFIX)-1
         if self.NPOSTFIX: formatString = '1{0}{1}{2:0{3}b}{4:+d}'
         else: formatString = '1{0}{1}{4:+d}'
         return formatString.format(
             hcode&1,
             'x'*(2+hcode>>1) if hcode<13 or verbose else '[{}*x]'.format(2+hcode>>1),
             lcode, self.NPOSTFIX,
             self.NDIRECT+1-(4<<self.NPOSTFIX))

     def explanation(self, index, extra):
         """
         >>> d = DistanceAlphabet('D', NPOSTFIX=2, NDIRECT=10)
         >>> d[55].explanation(13)
         '11[1101]01-5: [0]+240'
         """
         extraBits = self.extraBits(index)
         extraString = '[{:0{}b}]'.format(extra, extraBits)
         return '{0}: [{1[0]}]{1[1]:+d}'.format(
             self.mnemonic(index, True).replace('x'*(extraBits or 1), extraString),
             self.value(index, extra))

 #Classes for doing actual work------------------------------------------
 class ContextModeKeeper:
     """For computing the literal context mode.
     You feed it characters, and it computes indices in the context map.
     """
     def __init__(self, mode):
         self.chars = deque([0,0], maxlen=2)
         self.mode = mode

     def setContextMode(self, mode):
         """Switch to given context mode (0..3)"""
         self.mode = mode
     def getIndex(self):
         if self.mode==0:  #LSB6
             return self.chars[1]&0x3f
         elif self.mode==1: #MSB6
             return self.chars[1]>>2
         elif self.mode==2: #UTF8: character class of previous and a bit of the second
             p2,p1 = self.chars
             return self.lut0[p1]|self.lut1[p2]
         elif self.mode==3: #Signed: initial bits of last two bytes
             p2,p1 = self.chars
             return self.lut2[p1]<<3|self.lut2[p2]

     def add(self, index):
         """Adjust the context for output char (as int)."""
         self.chars.append(index)

     #0: control     #16: quote  #32: ,:;  #48: AEIOU
     #4: tab/lf/cr   #20: %      #36: .    #52: BC..Z
     #8: space       #24: (<[{   #40: =    #56: aeiou
     #12:!#$&*+-/?@| #28: )>]}   #44: 0-9  #60: bc..z
     lut0 = [0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
             0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
             8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
            44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
            12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
            52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
            12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
            60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0
            ]+[0,1]*32+[2,3]*32
     #0: space  1:punctuation  2:digit/upper 3:lower
     lut1 = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
              0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
              2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
              1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
              2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
              1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
              3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0
            ]+[0]*96+[2]*32
     #initial bits: 8*0, 4*0, 2*0, 1*0, 1*1, 2*1, 4*1, 8*1
     lut2 = [0]+[1]*15+[2]*48+[3]*64+[4]*64+[5]*48+[6]*15+[7]
     assert len(lut0)==len(lut1)==len(lut2)==256

 class WordList:
     """Word list.
     >>> WordList().word(7, 35555)
     b'Program to '
     """
     NDBITS = [0,  0,  0,  0, 10, 10, 11, 11, 10, 10,
              10, 10, 10,  9,  9,  8,  7,  7,  8,  7,
               7,  6,  6,  5,  5]
     def __init__(self):
         self.file = open('dict', 'rb')
         self.compileActions()

     def word(self, size, dist):
         """Get word
         """
         #split dist in index and action
         ndbits = self.NDBITS[size]
         index = dist&(1<<ndbits)-1
         action = dist>>ndbits
         #compute position in file
         position = sum(n<<self.NDBITS[n] for n in range(4,size))+size*index
         self.file.seek(position)
         return self.doAction(self.file.read(size), action)

     def upperCase1(self, word):
         word = word.decode('utf8')
         word = word[0].upper()+word[1:]
         return word.encode('utf8')


     #Super compact form of action table.
     #_ means space, .U means UpperCaseAll, U(w) means UpperCaseFirst
     actionTable = r"""
         0:w        25:w+_for_     50:w+\n\t       75:w+. This_100:w+ize_
         1:w+_      26:w[3:]       51:w+:          76:w+,      101:w.U+.
         2:_+w+_    27:w[:-2]      52:_+w+._       77:.+w+_    102:\xc2\xa0+w
         3:w[1:]    28:w+_a_       53:w+ed_        78:U(w)+(   103:_+w+,
         4:U(w)+_   29:w+_that_    54:w[9:]        79:U(w)+.   104:U(w)+="
         5:w+_the_  30:_+U(w)      55:w[7:]        80:w+_not_  105:w.U+="
         6:_+w      31:w+._        56:w[:-6]       81:_+w+="   106:w+ous_
         7:s_+w+_   32:.+w         57:w+(          82:w+er_    107:w.U+,_
         8:w+_of_   33:_+w+,_      58:U(w)+,_      83:_+w.U+_  108:U(w)+=\'
         9:U(w)     34:w[4:]       59:w[:-8]       84:w+al_    109:_+U(w)+,
        10:w+_and_  35:w+_with_    60:w+_at_       85:_+w.U    110:_+w.U+="
        11:w[2:]    36:w+\'        61:w+ly_        86:w+=\'    111:_+w.U+,_
        12:w[:-1]   37:w+_from_    62:_the_+w+_of_ 87:w.U+"    112:_+w.U+,
        13:,_+w+_   38:w+_by_      63:w[:-5]       88:U(w)+._  113:w.U+(
        14:w+,_     39:w[5:]       64:w[:-9]       89:_+w+(    114:w.U+._
        15:_+U(w)+_ 40:w[6:]       65:_+U(w)+,_    90:w+ful_   115:_+w.U+.
        16:w+_in_   41:_the_+w     66:U(w)+"       91:_+U(w)+._116:w.U+=\'
        17:w+_to_   42:w[:-4]      67:.+w+(        92:w+ive_   117:_+w.U+._
        18:e_+w+_   43:w+. The_    68:w.U+_        93:w+less_  118:_+U(w)+="
        19:w+"      44:w.U         69:U(w)+">      94:w.U+\'   119:_+w.U+=\'
        20:w+.      45:w+_on_      70:w+="         95:w+est_   120:_+U(w)+=\'
        21:w+">     46:w+_as_      71:_+w+.        96:_+U(w)+.
        22:w+\n     47:w+_is_      72:.com/+w      97:w.U+">
        23:w[:-3]   48:w[:-7]                      98:_+w+=\'
        24:w+]      49:w[:-1]+ing_ 74:U(w)+\'      99:U(w)+,
         """

     def compileActions(self):
         """Build the action table from the text above
         """
         import re
         self.actionList = actions = [None]*121
         #Action 73, which is too long, looks like this when expanded:
         actions[73] = "b' the '+w+b' of the '"
         #find out what the columns are
         actionLines = self.actionTable.splitlines()
         colonPositions = [m.start()
             for m in re.finditer(':',actionLines[1])
             ]+[100]
         columns = [(colonPositions[i]-3,colonPositions[i+1]-3)
             for i in range(len(colonPositions)-1)]
         for line in self.actionTable.splitlines(keepends=False):
             for start,end in columns:
                 action = line[start:end]
                 #skip empty actions
                 if not action or action.isspace(): continue
                 #chop it up, and check if the colon is properly placed
                 index, colon, action = action[:3], action[3], action[4:]
                 assert colon==':'
                 #remove filler spaces at right
                 action = action.rstrip()
                 #replace space symbols
                 action = action.replace('_', ' ')
                 wPos = action.index('w')
                 #add quotes around left string when present
                 #translation: any pattern from beginning, up to
                 #(but not including) a + following by a w later on
                 action = re.sub(r"^(.*)(?=\+[U(]*w)", r"b'\1'", action)
                 #add quotes around right string when present
                 #translation: anything with a w in it, followed by a +
                 #and a pattern up to the end
                 #(there is no variable lookbehind assertion,
                 #so we have to copy the pattern)
                 action = re.sub(r"(w[[:\-1\]).U]*)\+(.*)$", r"\1+b'\2'", action)
                 #expand shortcut for uppercaseAll
                 action = action.replace(".U", ".upper()")
                 #store action
                 actions[int(index)] = action

     def doAction(self, w, action):
         """Perform the proper action
         """
         #set environment for the UpperCaseFirst
         U = self.upperCase1
         return eval(self.actionList[action], locals())

 class Layout:
     """Class to layout the output.
     """
     #display width of hexdata+bitdata
     width = 25
     #general
     def __init__(self, stream):
         self.stream = stream
         self.bitPtr = self.width

     def makeHexData(self, pos):
         """Produce hex dump of all data containing the bits
         from pos to stream.pos
         """
         firstAddress = pos+7>>3
         lastAddress = self.stream.pos+7>>3
         return ''.join(map('{:02x} '.format,
             self.stream.data[firstAddress:lastAddress]))

     def formatBitData(self, pos, width1, width2=0):
         """Show formatted bit data:
         Bytes are separated by commas
         whole bytes are displayed in hex
         >>> Layout(olleke).formatBitData(6, 2, 16)
         '|00h|2Eh,|00'
         >>> Layout(olleke).formatBitData(4, 1, 0)
         '1'
         """
         result = []
         #make empty prefix code explicit
         if width1==0: result = ['()', ',']
         for width in width1, width2:
             #skip empty width2
             if width==0: continue
             #build result backwards in a list
             while width>0:
                 availableBits = 8-(pos&7)
                 if width<availableBits:
                     #read partial byte, beginning nor ending at boundary
                     data = self.stream.data[pos>>3] >> (pos&7) & (1<<width)-1
                     result.append('{:0{}b}'.format(data, width))
                 elif availableBits<8:
                     #read rest of byte, ending at boundary
                     data = self.stream.data[pos>>3] >> (pos&7)
                     result.append('|{:0{}b}'.format(data, availableBits))
                 else:
                     #read whole byte (in hex), beginning and ending at boundary
                     data = self.stream.data[pos>>3]
                     result.append('|{:02X}h'.format(data))
                 width -= availableBits
                 pos += availableBits
             #if width overshot from the availableBits subtraction, fix it
             pos += width
             #add comma to separate fields
             result.append(',')
         #concatenate pieces, reversed, skipping the last space
         return ''.join(result[-2::-1])

     def readPrefixCode(self, alphabet):
         """give alphabet the prefix code that is read from the stream
         Called for the following alphabets, in this order:
         The alphabet in question must have a "logical" order,
         otherwise the assignment of symbols doesn't work.
         """
         mode, numberOfSymbols = self.verboseRead(PrefixCodeHeader(alphabet.name))
         if mode=='Complex':
             #for a complex code, numberOfSymbols means hskip
             self.readComplexCode(numberOfSymbols, alphabet)
             return alphabet
         else:
             table = []
             #Set table of lengths for mnemonic function
             lengths = [[0], [1,1], [1,2,2], '????'][numberOfSymbols-1]
             #adjust mnemonic function of alphabet class
             def myMnemonic(index):
                 return '{} bit{}: {}'.format(
                     lengths[i],
                     '' if lengths[i]==1 else 's',
                     alphabet.__class__.mnemonic(alphabet, index)
                     )
             alphabet.mnemonic = myMnemonic
             for i in range(numberOfSymbols):
                 table.append(self.verboseRead(alphabet, skipExtra=True).index)
             #restore mnemonic
             del alphabet.mnemonic
             if numberOfSymbols==4:
                 #read tree shape to redefine lengths
                 lengths = self.verboseRead(TreeShapeAlhabet())
             #construct the alphabet prefix code
             alphabet.setLength(dict(zip(table, lengths)))
         return alphabet

     def readComplexCode(self, hskip, alphabet):
         """Read complex code"""
         stream = self.stream
         #read the lengths for the length code
         lengths = [1,2,3,4,0,5,17,6,16,7,8,9,10,11,12,13,14,15][hskip:]
         codeLengths = {}
         total = 0
         lol = LengthOfLengthAlphabet('##'+alphabet.name)
         #lengthCode will be used for coding the lengths of the new code
         #we use it for display until now; definition comes below
         lengthCode = LengthAlphabet('#'+alphabet.name)
         lengthIter = iter(lengths)
         lengthsLeft = len(lengths)
         while total<32 and lengthsLeft>0:
             lengthsLeft -= 1
             newSymbol = next(lengthIter)
             lol.description = str(lengthCode[newSymbol])
             length = self.verboseRead(lol)
             if length:
                 codeLengths[newSymbol] = length
                 total += 32>>length
         if total>32: raise ValueError("Stream format")
         if len(codeLengths)==1: codeLengths[list(codeLengths.keys())[0]] = 0
         #Now set the encoding of the lengthCode
         lengthCode.setLength(codeLengths)
         print("***** Lengths for {} will be coded as:".format(alphabet.name))
         lengthCode.showCode()
         #Now determine the symbol lengths with the lengthCode
         symbolLengths = {}
         total = 0
         lastLength = 8
         alphabetIter = iter(alphabet)
         while total<32768:
             #look ahead to see what is going to happen
             length = lengthCode.decodePeek(
                 self.stream.peek(lengthCode.maxLength))[1].index
             #in every branch, set lengthCode.description to explanatory text
             #lengthCode calls format(symbol, extra) with this string
             if length==0:
                 symbol = next(alphabetIter)
                 lengthCode.description = 'symbol {} unused'.format(symbol)
                 self.verboseRead(lengthCode)
                 #unused symbol
                 continue
             if length==16:
                 lengthCode.description = \
                     '{1}+3 symbols of length '+str(lastLength)
                 extra = self.verboseRead(lengthCode)
                 #scan series of 16s (repeat counts)
                 #start with repeat count 2
                 repeat = 2
                 startSymbol = next(alphabetIter)
                 endSymbol = next(alphabetIter)
                 symbolLengths[startSymbol.index] = \
                     symbolLengths[endSymbol.index] = lastLength
                 #count the two just defined symbols
                 total += 2*32768>>lastLength
                 #note: loop may end because we're there
                 #even if a 16 _appears_ to follow
                 while True:
                     #determine last symbol
                     oldRepeat = repeat
                     repeat = (repeat-2<<2)+extra+3
                     #read as many symbols as repeat increased
                     for i in range(oldRepeat, repeat):
                         endSymbol = next(alphabetIter)
                         symbolLengths[endSymbol.index] = lastLength
                     #compute new total; it may be end of loop
                     total += (repeat-oldRepeat)*32768>>lastLength
                     if total>=32768: break
                     #see if there is more to do
                     length = lengthCode.decodePeek(
                         self.stream.peek(lengthCode.maxLength))[1].index
                     if length!=16: break
                     lengthCode.description = 'total {}+{{1}} symbols'.format(
                         (repeat-2<<2)+3)
                     extra = self.verboseRead(lengthCode)
             elif length==17:
                 #read, and show explanation
                 lengthCode.description = '{1}+3 unused'
                 extra = self.verboseRead(lengthCode)
                 #scan series of 17s (groups of zero counts)
                 #start with repeat count 2
                 repeat = 2
                 startSymbol = next(alphabetIter)
                 endSymbol = next(alphabetIter)
                 #note: loop will not end with total==32768,
                 #since total doesn't change here
                 while True:
                     #determine last symbol
                     oldRepeat = repeat
                     repeat = (repeat-2<<3)+extra+3
                     #read as many symbols as repeat increases
                     for i in range(repeat-oldRepeat):
                         endSymbol = next(alphabetIter)
                     #see if there is more to do
                     length = lengthCode.decodePeek(
                         self.stream.peek(lengthCode.maxLength))[1].index
                     if length!=17: break
                     lengthCode.description = 'total {}+{{1}} unused'.format(
                         (repeat-2<<3)+3)
                     extra = self.verboseRead(lengthCode)
             else:
                 symbol = next(alphabetIter)
                 #double braces for format
                 char = str(symbol)
                 if char in '{}': char *= 2
                 lengthCode.description = \
                     'Length for {} is {{0.index}} bits'.format(char)
                 #output is not needed (will be 0)
                 self.verboseRead(lengthCode)
                 symbolLengths[symbol.index] = length
                 total += 32768>>length
                 lastLength = length
         assert total==32768
         alphabet.setLength(symbolLengths)
         print('End of table. Prefix code '+alphabet.name+':')
         alphabet.showCode()

     #stream
     def processStream(self):
         """Process a brotli stream.
         """
         print('addr  hex{:{}s}binary context explanation'.format(
             '', self.width-10))
         print('Stream header'.center(60, '-'))
         self.windowSize = self.verboseRead(WindowSizeAlphabet())
         print('Metablock header'.center(60, '='))
         self.ISLAST = False
         self.output = bytearray()
         while not self.ISLAST:
             self.ISLAST = self.verboseRead(
                 BoolCode('LAST', description="Last block"))
             if self.ISLAST:
                 if self.verboseRead(
                     BoolCode('EMPTY', description="Empty block")): break
             if self.metablockLength(): continue
             if not self.ISLAST and self.uncompressed(): continue
             print('Block type descriptors'.center(60, '-'))
             self.numberOfBlockTypes = {}
             self.currentBlockCounts = {}
             self.blockTypeCodes = {}
             self.blockCountCodes = {}
             for blockType in (L,I,D): self.blockType(blockType)
             print('Distance code parameters'.center(60, '-'))
             self.NPOSTFIX, self.NDIRECT = self.verboseRead(DistanceParamAlphabet())
             self.readLiteralContextModes()
             print('Context maps'.center(60, '-'))
             self.cmaps = {}
             #keep the number of each kind of prefix tree for the last loop
             numberOfTrees = {I: self.numberOfBlockTypes[I]}
             for blockType in (L,D):
                 numberOfTrees[blockType] = self.contextMap(blockType)
             print('Prefix code lists'.center(60, '-'))
             self.prefixCodes = {}
             for blockType in (L,I,D):
                 self.readPrefixArray(blockType, numberOfTrees[blockType])
             self.metablock()

     #metablock header
     def verboseRead(self, alphabet, context='', skipExtra=False):
         """Read symbol and extra from stream and explain what happens.
         Returns the value of the symbol
         >>> olleke.pos = 0
         >>> l = Layout(olleke)
         >>> l.verboseRead(WindowSizeAlphabet())
         0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
         4194288
         """
         #TODO 2: verbosity level, e.g. show only codes and maps in header
         stream = self.stream
         pos = stream.pos
         if skipExtra:
             length, symbol = alphabet.readTuple(stream)
             extraBits, extra = 0, None
         else:
             length, symbol, extraBits, extra = alphabet.readTupleAndExtra(
                 stream)
         #fields: address, hex data, binary data, name of alphabet, explanation
         hexdata = self.makeHexData(pos)
         addressField = '{:04x}'.format(pos+7>>3) if hexdata else ''
         bitdata = self.formatBitData(pos, length, extraBits)
         #bitPtr moves bitdata so that the bytes are easier to read
         #jump back to right if a new byte starts
         if '|' in bitdata[1:]:
             #start over on the right side
             self.bitPtr = self.width
         fillWidth = self.bitPtr-(len(hexdata)+len(bitdata))
         if fillWidth<0: fillWidth = 0
         print('{:<5s} {:<{}s} {:7s} {}'.format(
             addressField,
             hexdata+' '*fillWidth+bitdata, self.width,
             context+alphabet.name,
             symbol if skipExtra else symbol.explanation(extra),
             ))
         #jump to the right if we started with a '|'
         #because we didn't jump before printing
         if bitdata.startswith('|'): self.bitPtr = self.width
         else: self.bitPtr -= len(bitdata)
         return symbol if skipExtra else symbol.value(extra)

     def metablockLength(self):
         """Read MNIBBLES and meta block length;
         if empty block, skip block and return true.
         """
         self.MLEN = self.verboseRead(MetablockLengthAlphabet())
         if self.MLEN:
             return False
         #empty block; skip and return False
         self.verboseRead(ReservedAlphabet())
         MSKIP = self.verboseRead(SkipLengthAlphabet())
         self.verboseRead(FillerAlphabet(streamPos=self.stream.pos))
         self.stream.pos += 8*MSKIP
         print("Skipping to {:x}".format(self.stream.pos>>3))
         return True

     def uncompressed(self):
         """If true, handle uncompressed data
         """
         ISUNCOMPRESSED = self.verboseRead(
             BoolCode('UNCMPR', description='Is uncompressed?'))
         if ISUNCOMPRESSED:
             self.verboseRead(FillerAlphabet(streamPos=self.stream.pos))
             print('Uncompressed data:')
             self.output += self.stream.readBytes(self.MLEN)
             print(outputFormatter(self.output[-self.MLEN:]))
         return ISUNCOMPRESSED

     def blockType(self, kind):
         """Read block type switch descriptor for given kind of blockType."""
         NBLTYPES = self.verboseRead(TypeCountAlphabet(
             'BT#'+kind[0].upper(),
             description='{} block types'.format(kind),
             ))
         self.numberOfBlockTypes[kind] = NBLTYPES
         if NBLTYPES>=2:
             self.blockTypeCodes[kind] = self.readPrefixCode(
                 BlockTypeAlphabet('BT'+kind[0].upper(), NBLTYPES))
             self.blockCountCodes[kind] = self.readPrefixCode(
                 BlockCountAlphabet('BC'+kind[0].upper()))
             blockCount = self.verboseRead(self.blockCountCodes[kind])
         else:
             blockCount = 1<<24
         self.currentBlockCounts[kind] = blockCount

     def readLiteralContextModes(self):
         """Read literal context modes.
         LSB6: lower 6 bits of last char
         MSB6: upper 6 bits of last char
         UTF8: rougly dependent on categories:
             upper 4 bits depend on category of last char:
                 control/whitespace/space/ punctuation/quote/%/open/close/
                 comma/period/=/digits/ VOWEL/CONSONANT/vowel/consonant
             lower 2 bits depend on category of 2nd last char:
                 space/punctuation/digit or upper/lowercase
         signed: hamming weight of last 2 chars
         """
         print('Context modes'.center(60, '-'))
         self.literalContextModes = []
         for i in range(self.numberOfBlockTypes[L]):
             self.literalContextModes.append(
                 self.verboseRead(LiteralContextMode(number=i)))

     def contextMap(self, kind):
         """Read context maps
         Returns the number of differnt values on the context map
         (In other words, the number of prefix trees)
         """
         NTREES = self.verboseRead(TypeCountAlphabet(
             kind[0].upper()+'T#',
             description='{} prefix trees'.format(kind)))
         mapSize = {L:64, D:4}[kind]
         if NTREES<2:
             self.cmaps[kind] = [0]*mapSize
         else:
             #read CMAPkind
             RLEMAX = self.verboseRead(RLEmaxAlphabet(
                 'RLE#'+kind[0].upper(),
                 description=kind+' context map'))
             alphabet = TreeAlphabet('CM'+kind[0].upper(), NTREES=NTREES, RLEMAX=RLEMAX)
             cmapCode = self.readPrefixCode(alphabet)
             tableSize = mapSize*self.numberOfBlockTypes[kind]
             cmap = []
             while len(cmap)<tableSize:
                 cmapCode.description = 'map {}, entry {}'.format(
                     *divmod(len(cmap), mapSize))
                 count, value = self.verboseRead(cmapCode)
                 cmap.extend([value]*count)
             assert len(cmap)==tableSize
             IMTF = self.verboseRead(BoolCode('IMTF', description='Apply inverse MTF'))
             if IMTF:
                 self.IMTF(cmap)
             if kind==L:
                 print('Context maps for literal data:')
                 for i in range(0, len(cmap), 64):
                     print(*(
                         ''.join(map(str, cmap[j:j+8]))
                         for j in range(i, i+64, 8)
                         ))
             else:
                 print('Context map for distances:')
                 print(*(
                     ''.join(map('{:x}'.format, cmap[i:i+4]))
                     for i in range(0, len(cmap), 4)
                     ))
             self.cmaps[kind] = cmap
         return NTREES

     @staticmethod
     def IMTF(v):
         """In place inverse move to front transform.
         """
         #mtf is initialized virtually with range(infinity)
         mtf = []
         for i, vi in enumerate(v):
             #get old value from mtf. If never seen, take virtual value
             try: value = mtf.pop(vi)
             except IndexError: value = vi
             #put value at front
             mtf.insert(0, value)
             #replace transformed value
             v[i] = value

     def readPrefixArray(self, kind, numberOfTrees):
         """Read prefix code array"""
         prefixes = []
         for i in range(numberOfTrees):
             if kind==L: alphabet = LiteralAlphabet(i)
             elif kind==I: alphabet = InsertAndCopyAlphabet(i)
             elif kind==D: alphabet = DistanceAlphabet(
                 i, NPOSTFIX=self.NPOSTFIX, NDIRECT=self.NDIRECT)
             self.readPrefixCode(alphabet)
             prefixes.append(alphabet)
         self.prefixCodes[kind] = prefixes

     #metablock data
     def metablock(self):
         """Process the data.
         Relevant variables of self:
         numberOfBlockTypes[kind]: number of block types
         currentBlockTypes[kind]: current block types (=0)
         literalContextModes: the context modes for the literal block types
         currentBlockCounts[kind]: counters for block types
         blockTypeCodes[kind]: code for block type
         blockCountCodes[kind]: code for block count
         cmaps[kind]: the context maps (not for I)
         prefixCodes[kind][#]: the prefix codes
         lastDistances: the last four distances
         lastChars: the last two chars
         output: the result
         """
         print('Meta block contents'.center(60, '='))
         self.currentBlockTypes = {L:0, I:0, D:0, pL:1, pI:1, pD:1}
         self.lastDistances = deque([17,16,11,4], maxlen=4)
         #the current context mode is for block type 0
         self.contextMode = ContextModeKeeper(self.literalContextModes[0])
         wordList = WordList()

         #setup distance callback function
         def distanceCallback(symbol, extra):
             "callback function for displaying decoded distance"
             index, offset = symbol.value(extra)
             if index:
                 #recent distance
                 distance = self.lastDistances[-index]+offset
                 return 'Distance: {}last{:+d}={}'.format(index, offset, distance)
             #absolute value
             if offset<=maxDistance:
                 return 'Absolute value: {} (pos {})'.format(offset, maxDistance-offset)
             #word list value
             action, word = divmod(offset-maxDistance, 1<<wordList.NDBITS[copyLen])
             return '{}-{} gives word {},{} action {}'.format(
                 offset, maxDistance, copyLen, word, action)
         for dpc in self.prefixCodes[D]: dpc.callback = distanceCallback

         blockLen = 0
         #there we go
         while blockLen<self.MLEN:
             #get insert&copy command
             litLen, copyLen, dist0Flag = self.verboseRead(
                 self.prefixCodes[I][
                     self.figureBlockType(I)])
             #literal data
             for i in range(litLen):
                 bt = self.figureBlockType(L)
                 cm = self.contextMode.getIndex()
                 ct = self.cmaps[L][bt<<6|cm]
                 char = self.verboseRead(
                     self.prefixCodes[L][ct],
                     context='{},{}='.format(bt,cm))
                 self.contextMode.add(char)
                 self.output.append(char)
             blockLen += litLen
             #check if we're done
             if blockLen>=self.MLEN: return
             #distance
             #distances are computed relative to output length, at most window size
             maxDistance = min(len(self.output), self.windowSize)
             if dist0Flag:
                 distance = self.lastDistances[-1]
             else:
                 bt = self.figureBlockType(D)
                 cm = {2:0, 3:1, 4:2}.get(copyLen, 3)
                 ct = self.cmaps[D][bt<<2|cm]
                 index, offset = self.verboseRead(
                     self.prefixCodes[D][ct],
                     context='{},{}='.format(bt,cm))
                 distance = self.lastDistances[-index]+offset if index else offset
                 if index==1 and offset==0:
                     #to make sure distance is not put in last distance list
                     dist0Flag = True
             if distance<=maxDistance:
                 #copy from output
                 for i in range(
                         maxDistance-distance,
                         maxDistance-distance+copyLen):
                     self.output.append(self.output[i])
                 if not dist0Flag: self.lastDistances.append(distance)
                 comment = 'Seen before'
             else:
                 #fetch from wordlist
                 newWord = wordList.word(copyLen, distance-maxDistance-1)
                 self.output.extend(newWord)
                 #adjust copyLen to reflect actual new data
                 copyLen = len(newWord)
                 comment = 'From wordlist'
             blockLen += copyLen
             print(' '*40,
                 comment,
                 ': "',
                 outputFormatter(self.output[-copyLen:]),
                 '"',
                 sep='')
             self.contextMode.add(self.output[-2])
             self.contextMode.add(self.output[-1])

     def figureBlockType(self, kind):
         counts, types = self.currentBlockCounts, self.currentBlockTypes
         if counts[kind]==0:
             newType = self.verboseRead(self.blockTypeCodes[kind])
             if newType==-2: newType = types['P'+kind]
             elif newType==-1:
                 newType = (types[kind]+1)%self.numberOfBlockTypes[kind]
             types['P'+kind] = types[kind]
             types[kind] = newType
             counts[kind] = self.verboseRead(self.blockCountCodes[kind])
         counts[kind] -=1
         return types[kind]

 __test__ = {
 'BitStream': """
     >>> bs = BitStream(b'Jurjen')
     >>> bs.readBytes(2)
     b'Ju'
     >>> bs.read(6) #r=01110010
     50
     >>> bs
     BitStream(pos=2:6)
     >>> bs.peek(5)  #j=01101010
     9
     >>> bs.readBytes(2)
     Traceback (most recent call last):
         ...
     ValueError: readBytes: need byte boundary
     """,

 'Symbol': """
     >>> a=Symbol(MetablockLengthAlphabet(),5)
     >>> len(a)
     2
     >>> int(a)
     5
     >>> a.bitPattern()
     '01'
     >>> a.value(200000)
     200001
     >>> a.explanation(300000)
     'data length: 493e0h+1=300001'
     """,

 'RangeDecoder': """
     >>> a=RangeDecoder(bitLength=3)
     >>> len(a)
     8
     >>> a.name='t'
     >>> list(a)
     [Symbol(t, 0), Symbol(t, 1), Symbol(t, 2), Symbol(t, 3), Symbol(t, 4), Symbol(t, 5), Symbol(t, 6), Symbol(t, 7)]
     >>> a[2]
     Symbol(t, 2)
     >>> a.bitPattern(4)
     '100'
     >>> a.length(2)
     3
     >>> a.decodePeek(15)
     (3, Symbol(t, 7))
     >>>

     """,

 'PrefixDecoder': """
     >>> a=PrefixDecoder(decodeTable={0:1,1:2,3:3,7:4})
     >>> len(a)
     4
     >>> a.name='t'
     >>> list(a)
     [Symbol(t, 1), Symbol(t, 2), Symbol(t, 3), Symbol(t, 4)]
     >>> a.decodePeek(22)
     (1, Symbol(t, 1))
     >>> a.decodePeek(27)
     (3, Symbol(t, 3))
     >>> a.length(1)
     1
     >>> a.length(4)
     3
     """,

 'Code': """
     >>> a=Code('t',alphabetSize=10)
     >>> len(a)
     10
     >>> a.showCode()
     0000:0 0001:1 0010:2 0011:3 0100:4 0101:5 0110:6 0111:7 1000:8 1001:9
     >>> a.setLength({2:1,3:2,5:3,6:3})
     >>> a.showCode()
       0:2  01:3 011:5 111:6
     >>> len(a)
     4
     >>> def callback(i): return 'call{}back'.format(i)
     >>> a=Code('t',callback=callback,bitLength=3)
     >>> a[6].explanation()
     'call6back'
     """,

 'WithExtra': """
     >>> class A(WithExtra):
     ...    extraTable = [0,1,1,2,2]
     >>> a=A('t',alphabetSize=5)
     >>> a[1]
     Symbol(t, 1)
     >>> a.extraBits(2)
     1
     >>> a.mnemonic(4)
     '4'
     >>> a.readTupleAndExtra(BitStream(b'\x5b'))
     (3, Symbol(t, 3), 2, 3)
     """,

 'BoolCode': """
     >>> BoolCode('test')[0].explanation()
     '0: False'
     """,

 'Enumerator': """
     >>> class A(Enumerator):
     ...    extraTable = [0,1,1,2,2]
     ...    value0=3
     >>> a=A(alphabetLength=5)
     >>> a.value(3)
     Traceback (most recent call last):
         ...
     TypeError: value() missing 1 required positional argument: 'extra'
     >>> a.explanation(3,4)
     'xx 011: 8-11; 8+4=12'
     """,

 'WindowSizeAlphabet': """
     >>> windowSizeAlphabet = WindowSizeAlphabet()
     >>> windowSizeAlphabet[0]
     Traceback (most recent call last):
         ...
     ValueError: No symbol WindowSizeAlphabet[0]
     >>> len(windowSizeAlphabet)
     16
     >>> windowSizeAlphabet[21]
     Symbol(WSIZE, 21)
     >>> windowSizeAlphabet[21].bitPattern()
     '1001'
     >>> windowSizeAlphabet[21].extraBits()
     0
     >>> windowSizeAlphabet[21].index
     21
     >>> windowSizeAlphabet[10].value()
     1008
     >>> windowSizeAlphabet[10].explanation()
     'windowsize=(1<<10)-16=1008'
     >>> windowSizeAlphabet.showCode()
           0:65520    1100001:16368    1110001:32752       0011:262128
     0000001:131056   0010001:None        1001:2097136     1011:4194288
     1000001:4080     1010001:8176        0101:524272      0111:1048560
     0100001:1008     0110001:2032        1101:8388592     1111:16777200
     """,

 'TypeCountAlphabet': """
     >>> typeCountAlphabet = TypeCountAlphabet(description='bananas')
     >>> len(typeCountAlphabet)
     9
     >>> typeCountAlphabet[3]
     Symbol(BT#, 3)
     >>> typeCountAlphabet[9]
     Traceback (most recent call last):
         ...
     ValueError: No symbol TypeCountAlphabet[9]
     >>> print(typeCountAlphabet[3])
     xx,0101
     >>> typeCountAlphabet[8].value(127)
     256
     >>> typeCountAlphabet[4].explanation(2)
     'xxx,0111: 11 bananas'
     >>> typeCountAlphabet[0].explanation()
     '0: 1 banana'
     """,

 'DistanceParamAlphabet': """
     >>> dpa = DistanceParamAlphabet()
     >>> dpa.showCode()
     00:PF0 01:PF1 10:PF2 11:PF3
     >>> dpa.readTupleAndExtra(BitStream(b'\\x29'))
     (2, Symbol(DIST, 1), 4, 10)
     >>> dpa.explanation(2, 5)
     '2 postfix bits and 0101<<2=20 direct codes'
     """,

 'LiteralAlphabet': """
     >>> LiteralAlphabet(-1).showCode()   #doctest: +ELLIPSIS
     00000000:\\x00 00110100:4    01101000:h    10011100:\\x9c 11010000:\\xd0
     00000001:\\x01 00110101:5    01101001:i    10011101:\\x9d 11010001:\\xd1
     00000010:\\x02 00110110:6    01101010:j    10011110:\\x9e 11010010:\\xd2
     ...
     00101111:/    01100011:c    10010111:\\x97 11001011:\\xcb 11111111:\\xff
     00110000:0    01100100:d    10011000:\\x98 11001100:\\xcc
     00110001:1    01100101:e    10011001:\\x99 11001101:\\xcd
     00110010:2    01100110:f    10011010:\\x9a 11001110:\\xce
     00110011:3    01100111:g    10011011:\\x9b 11001111:\\xcf
     """,

 'BlockCountAlphabet': """
     >>> bc=BlockCountAlphabet('BCL')
     >>> len(bc)
     26
     >>> bs=BitStream(b'\\x40\\x83\\xc8\\x59\\12\\x02')
     >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
     'Block count: xx 00000: 1-4; 1+2=3'
     >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
     'Block count: xxx 00110: 33-40; 33+0=33'
     >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
     'Block count: xxxxxx 10001: 305-368; 305+28=333'
     >>> x = bc.readTupleAndExtra(bs); x[1].explanation(x[3])
     'Block count: xxxxxxxxxxx 10110: 2289-4336; 2289+1044=3333'
     """,

 'Layout': """
     >>> olleke.pos = 0
     >>> l = Layout(olleke)
     >>> l.verboseRead(WindowSizeAlphabet())
     0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
     4194288
     >>> l.verboseRead(BoolCode('LAST', description="Last block"))
                               1     LAST    Last block: 1: True
     True
     >>> l.verboseRead(BoolCode('EMPTY', description="Empty block"))
                              0      EMPTY   Empty block: 0: False
     False
     >>> l.verboseRead(MetablockLengthAlphabet())
     0001  2e 00        |00h|2Eh,|00 MLEN    data length: 002eh+1=47
     47
     >>> olleke.pos = 76
     >>> l = Layout(olleke)
     >>> x = l.verboseRead(DistanceAlphabet(0,NPOSTFIX=0,NDIRECT=0), skipExtra=True)
     000a  82                10|1100 D0      10[15*x]-3
     >>> x.explanation(0x86a3)
     '10[1000011010100011]-3: [0]+100000'
     """,

 'olleke': """
     >>> olleke.pos = 0
     >>> try: Layout(olleke).processStream()
     ... except NotImplementedError: pass
     ... #doctest: +REPORT_NDIFF
     addr  hex               binary context explanation
     -----------------------Stream header------------------------
     0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
     ======================Metablock header======================
                               1     LAST    Last block: 1: True
                              0      EMPTY   Empty block: 0: False
     0001  2e 00        |00h|2Eh,|00 MLEN    data length: 002eh+1=47
     -------------------Block type descriptors-------------------
     0003  00                      0 BT#L    0: 1 literal block type
                                  0  BT#I    0: 1 insert&copy block type
                                 0   BT#D    0: 1 distance block type
     ------------------Distance code parameters------------------
     0004  44               0|000,00 DIST    0 postfix bits and 0000<<0=0 direct codes
     -----------------------Context modes------------------------
                          10         LC0     Context mode for type 0: 2(UTF8)
     ------------------------Context maps------------------------
                         0           LT#     0: 1 literal prefix tree
                        0            DT#     0: 1 distance prefix tree
     ---------------------Prefix code lists----------------------
                      10             PFX     L0 is complex with lengths 3,4,0,5,17...
     0005  4f                    1|0 ##L0    len 3: coded with 3 bits
                             0111    ##L0    len 4: coded with 1 bits
                           10        ##L0    unused: coded with 3 bits
     0006  d6                    0|0 ##L0    len 5: skipped
                              011    ##L0    zero xxx: coded with 2 bits
     ***** Lengths for L0 will be coded as:
       0:len 4     01:zero xxx 011:unused   111:len 3
     0007  95                1|11,01 #L0     7+3 unused
                            0        #L0     Length for \\n is 4 bits
                      001,01         #L0     1+3 unused
     0008  44                010,0|1 #L0     total 19+2 unused
                            0        #L0     Length for " " is 4 bits
                           0         #L0     Length for ! is 4 bits
     0009  cb                011,|01 #L0     3+3 unused
                      |110,01        #L0     total 35+6 unused
     000a  82                      0 #L0     Length for K is 4 bits
                             000,01  #L0     0+3 unused
                            0        #L0     Length for O is 4 bits
     000b  4d                   01|1 #L0     symbol P unused
                             011     #L0     symbol Q unused
                            0        #L0     Length for R is 4 bits
     000c  88                000,|01 #L0     0+3 unused
                      |100,01        #L0     total 11+4 unused
     000d  b6                      0 #L0     Length for b is 4 bits
                                011  #L0     symbol c unused
                             011     #L0     symbol d unused
     000e  27                   11|1 #L0     Length for e is 3 bits
                          010,01     #L0     2+3 unused
                        |0           #L0     Length for k is 4 bits
     000f  1f                    111 #L0     Length for l is 3 bits
                              011    #L0     symbol m unused
                             0       #L0     Length for n is 4 bits
                           |0        #L0     Length for o is 4 bits
     0010  c1                 000,01 #L0     0+3 unused
                             0       #L0     Length for s is 4 bits
     0011  b4                   0|11 #L0     symbol t unused
                               0     #L0     Length for u is 4 bits
     End of table. Prefix code L0:
      000:e   0010:\\n  0110:!   0001:O   0101:b   0011:n   0111:s
      100:l   1010:" " 1110:K   1001:R   1101:k   1011:o   1111:u
                          11,01      PFX     IC0 is simple with 4 code words
     0012  2a                |2Ah|10 IC0     ? bits: I5C4
     0013  b5 ec              00|B5h IC0     ? bits: I6+xC7
     0015  22            0010|111011 IC0     ? bits: I8+xC5
     0016  8c            001100|0010 IC0     ? bits: I0C14+xx
                        0            SHAPE   False: lengths 2,2,2,2
     0017  74                 10,0|1 PFX     D0 is simple with 3 code words
     0018  a6                0|01110 D0      1 bit: 2last-3
                       010011        D0      2 bits: 11xx-3
     0019  aa                01010|1 D0      2 bits: 11xxx-3
     ====================Meta block contents=====================
                        |1,01        IC0     Literal: 9, copy: 5
     001a  41                   0001 0,0=L0  O
                             100     0,48=L0 l
     001b  a2                   10|0 0,62=L0 l
                             000     0,63=L0 e
     001c  a1                  1|101 0,59=L0 k
                            000      0,63=L0 e
                       |1010         0,59=L0 " "
     001d  b5                   0101 0,11=L0 b
                           |1011     0,60=L0 o
     001e  24                      0 0,3=D0  Distance: 2last-3=8
                                             Seen before: "lleke"
                               0,10  IC0     Literal: 6, copy: 7
                          |0010      0,59=L0 \\n
     001f  89                   1001 0,7=L0  R
                             000     0,52=L0 e
     0020  fa                  010|1 0,58=L0 b
                           1111      0,63=L0 u
     0021  eb                  011|1 0,59=L0 s
                          11,01      0,3=D0  Absolute value: 12 (pos 8)
                                             Seen before: "olleke\\n"
     0022  db                 01,1|1 IC0     Literal: 0, copy: 15
                       |110,11       0,3=D0  Absolute value: 27 (pos 0)
                                             Seen before: "Olleke bolleke\\n"
     0023  f8                     00 IC0     Literal: 5, copy: 4
                              1110   0,7=L0  K
     0024  2c                  00|11 0,52=L0 n
                           1011      0,62=L0 o
     0025  0d                   1|00 0,59=L0 l
                            0110     0,63=L0 !
     """,

 'file': """
     >>> try: Layout(BitStream(
     ... open("H:/Downloads/brotli-master/tests/testdata/10x10y.compressed",'rb')
     ...     .read())).processStream()
     ... except NotImplementedError: pass
     addr  hex               binary context explanation
     -----------------------Stream header------------------------
     0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
     ======================Metablock header======================
                               1     LAST    Last block: 1: True
                              0      EMPTY   Empty block: 0: False
     0001  13 00        |00h|13h,|00 MLEN    data length: 0013h+1=20
     -------------------Block type descriptors-------------------
     0003  00                      0 BT#L    0: 1 literal block type
                                  0  BT#I    0: 1 insert&copy block type
                                 0   BT#D    0: 1 distance block type
     ------------------Distance code parameters------------------
     0004  a4               0|000,00 DIST    0 postfix bits and 0000<<0=0 direct codes
     -----------------------Context modes------------------------
                          10         LC0     Context mode for type 0: 2(UTF8)
     ------------------------Context maps------------------------
                         0           LT#     0: 1 literal prefix tree
                        0            DT#     0: 1 distance prefix tree
     ---------------------Prefix code lists----------------------
     0005  b0                 0|1,01 PFX     L0 is simple with 2 code words
     0006  b2              0|1011000 L0      1 bit: X
     0007  ea              0|1011001 L0      1 bit: Y
                      01,01          PFX     IC0 is simple with 2 code words
     0008  81            0000001|111 IC0     1 bit: I1C9&D=0
     0009  47 02             0|47h|1 IC0     1 bit: I1C9
                        00,01        PFX     D0 is simple with 1 code word
     000b  8a                010|000 D0      0 bits: 10x-3
     ====================Meta block contents=====================
                            1        IC0     Literal: 1, copy: 9
                           0         0,0=L0  X
                       0,()          0,3=D0  Absolute value: 1 (pos 0)
                                             Seen before: "XXXXXXXXX"
                      0              IC0     Literal: 1, copy: 9, same distance
                    |1               0,54=L0 Y
                                             Seen before: "YYYYYYYYY"
     """,

 'XY': """
     >>> try: Layout(BitStream(brotli.compress('X'*10+'Y'*10))).processStream()
     ... except NotImplementedError: pass
     addr  hex               binary context explanation
     -----------------------Stream header------------------------
     0000  1b                   1011 WSIZE   windowsize=(1<<22)-16=4194288
     ======================Metablock header======================
                               1     LAST    Last block: 1: True
                              0      EMPTY   Empty block: 0: False
     0001  13 00        |00h|13h,|00 MLEN    data length: 0013h+1=20
     -------------------Block type descriptors-------------------
     0003  00                      0 BT#L    0: 1 literal block type
                                  0  BT#I    0: 1 insert&copy block type
                                 0   BT#D    0: 1 distance block type
     ------------------Distance code parameters------------------
     0004  a4               0|000,00 DIST    0 postfix bits and 0000<<0=0 direct codes
     -----------------------Context modes------------------------
                          10         LC0     Context mode for type 0: 2(UTF8)
     ------------------------Context maps------------------------
                         0           LT#     0: 1 literal prefix tree
                        0            DT#     0: 1 distance prefix tree
     ---------------------Prefix code lists----------------------
     0005  b0                 0|1,01 PFX     L0 is simple with 2 code words
     0006  b2              0|1011000 L0      1 bit: X
     0007  82              0|1011001 L0      1 bit: Y
                      00,01          PFX     IC0 is simple with 1 code word
     0008  84            0000100|100 IC0     0 bits: I4C6&D=0
     0009  00                 00,0|1 PFX     D0 is simple with 1 code word
     000a  e0                0|00000 D0      0 bits: last
     ====================Meta block contents=====================
                           ()        IC0     Literal: 4, copy: 6, same distance
                          0          0,0=L0  X
                         0           0,52=L0 X
                        0            0,54=L0 X
                       0             0,54=L0 X
                                             Seen before: "XXXXXX"
                     ()              IC0     Literal: 4, copy: 6, same distance
                    1                0,54=L0 Y
                   1                 0,54=L0 Y
                 |1                  0,54=L0 Y
     000b  01                      1 0,54=L0 Y
                                             Seen before: "YYYYYY"
     """,

 'empty': """
     >>> try: Layout(BitStream(b'\\x81\\x16\\x00\\x58')).processStream()
     ... except NotImplementedError: pass
     addr  hex               binary context explanation
     -----------------------Stream header------------------------
     0000  81                0000001 WSIZE   windowsize=(1<<17)-16=131056
     ======================Metablock header======================
                           |1        LAST    Last block: 1: True
     0001  16                      0 EMPTY   Empty block: 0: False
                                 11  MLEN    11: empty block
                                0    RSVD    Reserved (must be zero)
     0002  00           000000|00,01 SKIP    skip length: 0h+1=1
                     |00             SKIP    2 bits ignored
     Skipping to 4
     """,

 }

 if __name__=='__main__':
     import sys
     if len(sys.argv)>1:
         l = Layout(BitStream(open(sys.argv[1],'rb').read()))
         l.processStream()
     else:
         sys.path.append("h:/Persoonlijk/bin")
         try:
             import brotli
             open('brotlidump.br', 'wb').write(
                 brotli.compress(
                     open('brotlidump.py', 'r').read()
                 ))
             olleke = BitStream(brotli.compress(
                 'Olleke bolleke\nRebusolleke\nOlleke bolleke\nKnol!'))
         except ImportError: pass
         import doctest
         doctest.testmod(optionflags=doctest.REPORT_NDIFF
             #|doctest.FAIL_FAST
             )