| # udis86 - scripts/itab.py |
| # |
| # Copyright (c) 2009 Vivek Thampi |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without modification, |
| # are permitted provided that the following conditions are met: |
| # |
| # * Redistributions of source code must retain the above copyright notice, |
| # this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above copyright notice, |
| # this list of conditions and the following disclaimer in the documentation |
| # and/or other materials provided with the distribution. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
| # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| from optparse import OptionParser |
| import os |
| import sys |
| |
| sys.path.append( '../scripts' ); |
| |
| import ud_optable |
| import ud_opcode |
| |
| class UdItabGenerator( ud_opcode.UdOpcodeTables ): |
| |
| OperandDict = { |
| "Ap" : [ "OP_A" , "SZ_P" ], |
| "E" : [ "OP_E" , "SZ_NA" ], |
| "Eb" : [ "OP_E" , "SZ_B" ], |
| "Ew" : [ "OP_E" , "SZ_W" ], |
| "Ev" : [ "OP_E" , "SZ_V" ], |
| "Ed" : [ "OP_E" , "SZ_D" ], |
| "Eq" : [ "OP_E" , "SZ_Q" ], |
| "Ez" : [ "OP_E" , "SZ_Z" ], |
| "Ex" : [ "OP_E" , "SZ_MDQ" ], |
| "Ep" : [ "OP_E" , "SZ_P" ], |
| "G" : [ "OP_G" , "SZ_NA" ], |
| "Gb" : [ "OP_G" , "SZ_B" ], |
| "Gw" : [ "OP_G" , "SZ_W" ], |
| "Gv" : [ "OP_G" , "SZ_V" ], |
| "Gy" : [ "OP_G" , "SZ_MDQ" ], |
| "Gy" : [ "OP_G" , "SZ_MDQ" ], |
| "Gd" : [ "OP_G" , "SZ_D" ], |
| "Gq" : [ "OP_G" , "SZ_Q" ], |
| "Gx" : [ "OP_G" , "SZ_MDQ" ], |
| "Gz" : [ "OP_G" , "SZ_Z" ], |
| "M" : [ "OP_M" , "SZ_NA" ], |
| "Mb" : [ "OP_M" , "SZ_B" ], |
| "Mw" : [ "OP_M" , "SZ_W" ], |
| "Ms" : [ "OP_M" , "SZ_W" ], |
| "Md" : [ "OP_M" , "SZ_D" ], |
| "Mq" : [ "OP_M" , "SZ_Q" ], |
| "Mt" : [ "OP_M" , "SZ_T" ], |
| "Mo" : [ "OP_M" , "SZ_O" ], |
| "MwRv" : [ "OP_MR" , "SZ_WV" ], |
| "MdRy" : [ "OP_MR" , "SZ_DY" ], |
| "MbRv" : [ "OP_MR" , "SZ_BV" ], |
| "I1" : [ "OP_I1" , "SZ_NA" ], |
| "I3" : [ "OP_I3" , "SZ_NA" ], |
| "Ib" : [ "OP_I" , "SZ_B" ], |
| "Isb" : [ "OP_I" , "SZ_SB" ], |
| "Iw" : [ "OP_I" , "SZ_W" ], |
| "Iv" : [ "OP_I" , "SZ_V" ], |
| "Iz" : [ "OP_I" , "SZ_Z" ], |
| "Jv" : [ "OP_J" , "SZ_V" ], |
| "Jz" : [ "OP_J" , "SZ_Z" ], |
| "Jb" : [ "OP_J" , "SZ_B" ], |
| "R" : [ "OP_R" , "SZ_RDQ" ], |
| "C" : [ "OP_C" , "SZ_NA" ], |
| "D" : [ "OP_D" , "SZ_NA" ], |
| "S" : [ "OP_S" , "SZ_NA" ], |
| "Ob" : [ "OP_O" , "SZ_B" ], |
| "Ow" : [ "OP_O" , "SZ_W" ], |
| "Ov" : [ "OP_O" , "SZ_V" ], |
| "V" : [ "OP_V" , "SZ_O" ], |
| "W" : [ "OP_W" , "SZ_O" ], |
| "Wsd" : [ "OP_W" , "SZ_O" ], |
| "Wss" : [ "OP_W" , "SZ_O" ], |
| "P" : [ "OP_P" , "SZ_Q" ], |
| "Q" : [ "OP_Q" , "SZ_Q" ], |
| "VR" : [ "OP_VR" , "SZ_O" ], |
| "PR" : [ "OP_PR" , "SZ_Q" ], |
| "AL" : [ "OP_AL" , "SZ_NA" ], |
| "CL" : [ "OP_CL" , "SZ_NA" ], |
| "DL" : [ "OP_DL" , "SZ_NA" ], |
| "BL" : [ "OP_BL" , "SZ_NA" ], |
| "AH" : [ "OP_AH" , "SZ_NA" ], |
| "CH" : [ "OP_CH" , "SZ_NA" ], |
| "DH" : [ "OP_DH" , "SZ_NA" ], |
| "BH" : [ "OP_BH" , "SZ_NA" ], |
| "AX" : [ "OP_AX" , "SZ_NA" ], |
| "CX" : [ "OP_CX" , "SZ_NA" ], |
| "DX" : [ "OP_DX" , "SZ_NA" ], |
| "BX" : [ "OP_BX" , "SZ_NA" ], |
| "SI" : [ "OP_SI" , "SZ_NA" ], |
| "DI" : [ "OP_DI" , "SZ_NA" ], |
| "SP" : [ "OP_SP" , "SZ_NA" ], |
| "BP" : [ "OP_BP" , "SZ_NA" ], |
| "eAX" : [ "OP_eAX" , "SZ_NA" ], |
| "eCX" : [ "OP_eCX" , "SZ_NA" ], |
| "eDX" : [ "OP_eDX" , "SZ_NA" ], |
| "eBX" : [ "OP_eBX" , "SZ_NA" ], |
| "eSI" : [ "OP_eSI" , "SZ_NA" ], |
| "eDI" : [ "OP_eDI" , "SZ_NA" ], |
| "eSP" : [ "OP_eSP" , "SZ_NA" ], |
| "eBP" : [ "OP_eBP" , "SZ_NA" ], |
| "rAX" : [ "OP_rAX" , "SZ_NA" ], |
| "rCX" : [ "OP_rCX" , "SZ_NA" ], |
| "rBX" : [ "OP_rBX" , "SZ_NA" ], |
| "rDX" : [ "OP_rDX" , "SZ_NA" ], |
| "rSI" : [ "OP_rSI" , "SZ_NA" ], |
| "rDI" : [ "OP_rDI" , "SZ_NA" ], |
| "rSP" : [ "OP_rSP" , "SZ_NA" ], |
| "rBP" : [ "OP_rBP" , "SZ_NA" ], |
| "ES" : [ "OP_ES" , "SZ_NA" ], |
| "CS" : [ "OP_CS" , "SZ_NA" ], |
| "DS" : [ "OP_DS" , "SZ_NA" ], |
| "SS" : [ "OP_SS" , "SZ_NA" ], |
| "GS" : [ "OP_GS" , "SZ_NA" ], |
| "FS" : [ "OP_FS" , "SZ_NA" ], |
| "ST0" : [ "OP_ST0" , "SZ_NA" ], |
| "ST1" : [ "OP_ST1" , "SZ_NA" ], |
| "ST2" : [ "OP_ST2" , "SZ_NA" ], |
| "ST3" : [ "OP_ST3" , "SZ_NA" ], |
| "ST4" : [ "OP_ST4" , "SZ_NA" ], |
| "ST5" : [ "OP_ST5" , "SZ_NA" ], |
| "ST6" : [ "OP_ST6" , "SZ_NA" ], |
| "ST7" : [ "OP_ST7" , "SZ_NA" ], |
| "NONE" : [ "OP_NONE" , "SZ_NA" ], |
| "ALr8b" : [ "OP_ALr8b" , "SZ_NA" ], |
| "CLr9b" : [ "OP_CLr9b" , "SZ_NA" ], |
| "DLr10b" : [ "OP_DLr10b" , "SZ_NA" ], |
| "BLr11b" : [ "OP_BLr11b" , "SZ_NA" ], |
| "AHr12b" : [ "OP_AHr12b" , "SZ_NA" ], |
| "CHr13b" : [ "OP_CHr13b" , "SZ_NA" ], |
| "DHr14b" : [ "OP_DHr14b" , "SZ_NA" ], |
| "BHr15b" : [ "OP_BHr15b" , "SZ_NA" ], |
| "rAXr8" : [ "OP_rAXr8" , "SZ_NA" ], |
| "rCXr9" : [ "OP_rCXr9" , "SZ_NA" ], |
| "rDXr10" : [ "OP_rDXr10" , "SZ_NA" ], |
| "rBXr11" : [ "OP_rBXr11" , "SZ_NA" ], |
| "rSPr12" : [ "OP_rSPr12" , "SZ_NA" ], |
| "rBPr13" : [ "OP_rBPr13" , "SZ_NA" ], |
| "rSIr14" : [ "OP_rSIr14" , "SZ_NA" ], |
| "rDIr15" : [ "OP_rDIr15" , "SZ_NA" ], |
| "jWP" : [ "OP_J" , "SZ_WP" ], |
| "jDP" : [ "OP_J" , "SZ_DP" ], |
| |
| } |
| |
| # |
| # opcode prefix dictionary |
| # |
| PrefixDict = { |
| "aso" : "P_aso", |
| "oso" : "P_oso", |
| "rexw" : "P_rexw", |
| "rexb" : "P_rexb", |
| "rexx" : "P_rexx", |
| "rexr" : "P_rexr", |
| "seg" : "P_seg", |
| "inv64" : "P_inv64", |
| "def64" : "P_def64", |
| "depM" : "P_depM", |
| "cast1" : "P_c1", |
| "cast2" : "P_c2", |
| "cast3" : "P_c3", |
| "cast" : "P_cast", |
| "sext" : "P_sext" |
| } |
| |
| InvalidEntryIdx = 0 |
| InvalidEntry = { 'type' : 'invalid', |
| 'mnemonic' : 'invalid', |
| 'operands' : '', |
| 'prefixes' : '', |
| 'meta' : '' } |
| |
| Itab = [] # instruction table |
| ItabIdx = 1 # instruction table index |
| GtabIdx = 0 # group table index |
| GtabMeta = [] |
| |
| ItabLookup = {} |
| |
| MnemonicAliases = ( "invalid", "3dnow", "none", "db", "pause" ) |
| |
| def __init__( self, outputDir ): |
| # first itab entry (0) is Invalid |
| self.Itab.append( self.InvalidEntry ) |
| self.MnemonicsTable.extend( self.MnemonicAliases ) |
| self.outputDir = outputDir |
| |
| def toGroupId( self, id ): |
| return 0x8000 | id |
| |
| def genLookupTable( self, table, scope = '' ): |
| idxArray = [ ] |
| ( tabIdx, self.GtabIdx ) = ( self.GtabIdx, self.GtabIdx + 1 ) |
| self.GtabMeta.append( { 'type' : table[ 'type' ], 'meta' : table[ 'meta' ] } ) |
| |
| for _idx in range( self.sizeOfTable( table[ 'type' ] ) ): |
| idx = "%02x" % _idx |
| |
| e = self.InvalidEntry |
| i = self.InvalidEntryIdx |
| |
| if idx in table[ 'entries' ].keys(): |
| e = table[ 'entries' ][ idx ] |
| |
| # leaf node (insn) |
| if e[ 'type' ] == 'insn': |
| ( i, self.ItabIdx ) = ( self.ItabIdx, self.ItabIdx + 1 ) |
| self.Itab.append( e ) |
| elif e[ 'type' ] != 'invalid': |
| i = self.genLookupTable( e, 'static' ) |
| |
| idxArray.append( i ) |
| |
| name = "ud_itab__%s" % tabIdx |
| self.ItabLookup[ tabIdx ] = name |
| |
| self.ItabC.write( "\n" ); |
| if len( scope ): |
| self.ItabC.write( scope + ' ' ) |
| self.ItabC.write( "const uint16_t %s[] = {\n" % name ) |
| for i in range( len( idxArray ) ): |
| if i > 0 and i % 4 == 0: |
| self.ItabC.write( "\n" ) |
| if ( i%4 == 0 ): |
| self.ItabC.write( " /* %2x */" % i) |
| if idxArray[ i ] >= 0x8000: |
| self.ItabC.write( "%12s," % ("GROUP(%d)" % ( ~0x8000 & idxArray[ i ] ))) |
| else: |
| self.ItabC.write( "%12d," % ( idxArray[ i ] )) |
| self.ItabC.write( "\n" ) |
| self.ItabC.write( "};\n" ) |
| |
| return self.toGroupId( tabIdx ) |
| |
| def genLookupTableList( self ): |
| self.ItabC.write( "\n\n" ); |
| self.ItabC.write( "struct ud_lookup_table_list_entry ud_lookup_table_list[] = {\n" ) |
| for i in range( len( self.GtabMeta ) ): |
| f0 = self.ItabLookup[ i ] + "," |
| f1 = ( self.nameOfTable( self.GtabMeta[ i ][ 'type' ] ) ) + "," |
| f2 = "\"%s\"" % self.GtabMeta[ i ][ 'meta' ] |
| self.ItabC.write( " /* %03d */ { %s %s %s },\n" % ( i, f0, f1, f2 ) ) |
| self.ItabC.write( "};" ) |
| |
| def genInsnTable( self ): |
| self.ItabC.write( "struct ud_itab_entry ud_itab[] = {\n" ); |
| idx = 0 |
| for e in self.Itab: |
| opr_c = [ "O_NONE", "O_NONE", "O_NONE" ] |
| pfx_c = [] |
| opr = e[ 'operands' ] |
| for i in range(len(opr)): |
| if not (opr[i] in self.OperandDict.keys()): |
| print "error: invalid operand declaration: %s\n" % opr[i] |
| opr_c[i] = "O_" + opr[i] |
| opr = "%s %s %s" % (opr_c[0] + ",", opr_c[1] + ",", opr_c[2]) |
| |
| for p in e['prefixes']: |
| if not ( p in self.PrefixDict.keys() ): |
| print "error: invalid prefix specification: %s \n" % pfx |
| pfx_c.append( self.PrefixDict[p] ) |
| if len(e['prefixes']) == 0: |
| pfx_c.append( "P_none" ) |
| pfx = "|".join( pfx_c ) |
| |
| self.ItabC.write( " /* %04d */ { UD_I%s %s, %s },\n" \ |
| % ( idx, e[ 'mnemonic' ] + ',', opr, pfx ) ) |
| idx += 1 |
| self.ItabC.write( "};\n" ) |
| |
| self.ItabC.write( "\n\n" ); |
| self.ItabC.write( "const char * ud_mnemonics_str[] = {\n" ) |
| self.ItabC.write( ",\n ".join( [ "\"%s\"" % m for m in self.MnemonicsTable ] ) ) |
| self.ItabC.write( "\n};\n" ) |
| |
| |
| def genItabH( self ): |
| self.ItabH = open( os.path.join(self.outputDir, "udis86_itab.h"), "w" ) |
| |
| # Generate Table Type Enumeration |
| self.ItabH.write( "#ifndef UD_ITAB_H\n" ) |
| self.ItabH.write( "#define UD_ITAB_H\n\n" ) |
| |
| # table type enumeration |
| self.ItabH.write( "/* ud_table_type -- lookup table types (see lookup.c) */\n" ) |
| self.ItabH.write( "enum ud_table_type {\n " ) |
| enum = [ self.TableInfo[ k ][ 'name' ] for k in self.TableInfo.keys() ] |
| self.ItabH.write( ",\n ".join( enum ) ) |
| self.ItabH.write( "\n};\n\n" ); |
| |
| # mnemonic enumeration |
| self.ItabH.write( "/* ud_mnemonic -- mnemonic constants */\n" ) |
| enum = "enum ud_mnemonic_code {\n " |
| enum += ",\n ".join( [ "UD_I%s" % m for m in self.MnemonicsTable ] ) |
| enum += "\n} UD_ATTR_PACKED;\n" |
| self.ItabH.write( enum ) |
| self.ItabH.write( "\n" ) |
| |
| self.ItabH.write("\n/* itab entry operand definitions */\n"); |
| operands = self.OperandDict.keys() |
| operands.sort() |
| for o in operands: |
| self.ItabH.write("#define O_%-7s { %-12s %-8s }\n" % |
| (o, self.OperandDict[o][0] + ",", self.OperandDict[o][1])); |
| self.ItabH.write("\n\n"); |
| |
| self.ItabH.write( "extern const char * ud_mnemonics_str[];\n" ) |
| |
| self.ItabH.write( "#define GROUP(n) (0x8000 | (n))" ) |
| |
| self.ItabH.write( "\n#endif /* UD_ITAB_H */\n" ) |
| |
| self.ItabH.close() |
| |
| |
| def genItabC( self ): |
| self.ItabC = open( os.path.join(self.outputDir, "udis86_itab.c"), "w" ) |
| self.ItabC.write( "/* itab.c -- generated by itab.py, do no edit" ) |
| self.ItabC.write( " */\n" ); |
| self.ItabC.write( "#include \"udis86_decode.h\"\n\n" ); |
| |
| self.genLookupTable( self.OpcodeTable0 ) |
| self.genLookupTableList() |
| self.genInsnTable() |
| |
| self.ItabC.close() |
| |
| def genItab( self ): |
| self.genItabC() |
| self.genItabH() |
| |
| def main(): |
| parser = OptionParser() |
| parser.add_option("--outputDir", dest="outputDir", default="") |
| options, args = parser.parse_args() |
| generator = UdItabGenerator(os.path.normpath(options.outputDir)) |
| optableXmlParser = ud_optable.UdOptableXmlParser() |
| optableXmlParser.parse( args[ 0 ], generator.addInsnDef ) |
| |
| generator.genItab() |
| |
| if __name__ == '__main__': |
| main() |