From 49c15b324c9fcda0bc2f22809c45e2db04fb6152 Mon Sep 17 00:00:00 2001 From: Darryl Pogue Date: Tue, 31 Aug 2010 23:17:38 -0700 Subject: [PATCH] Updates to make progress. Progress is good. --- ASTNode.cpp | 8 ++++--- ASTNode.h | 37 ++++++++++++++++++++++++++---- ASTree.cpp | 56 +++++++++++++++++++++++++++++++++++++++++----- PythonBytecode.txt | 1 + bytecode.cpp | 23 +++++++++++++++++++ bytecode.h | 3 +++ module.cpp | 6 +++++ module.h | 1 + pycdas.cpp | 1 + 9 files changed, 124 insertions(+), 12 deletions(-) diff --git a/ASTNode.cpp b/ASTNode.cpp index c8dc006..c3dbe5f 100644 --- a/ASTNode.cpp +++ b/ASTNode.cpp @@ -21,7 +21,7 @@ void ASTNodeList::removeFirst() const char* ASTUnary::op_str() const { static const char* s_op_strings[] = { - "+", "-", "~", "not" + "+", "-", "~", "not ", "`" }; return s_op_strings[op()]; } @@ -32,7 +32,9 @@ const char* ASTBinary::op_str() const { static const char* s_op_strings[] = { ".", " ** ", " * ", " / ", " // ", " % ", " + ", " - ", - " << ", " >> ", " & ", " ^ ", " | ", " and ", " or " + " << ", " >> ", " & ", " ^ ", " | ", " and ", " or ", + " += ", " -= ", " *= ", " /= ", " %= ", " **= ", " <<= ", + " >>= ", " &= ", " ^= ", " |= ", " //= ", "[" }; return s_op_strings[op()]; } @@ -42,7 +44,7 @@ const char* ASTBinary::op_str() const const char* ASTCompare::op_str() const { static const char* s_cmp_strings[] = { - "<", "<=", "==", "!=", ">", ">=", "in", "not in", "is", "is not", + " < ", " <= ", " == ", " != ", " > ", " >= ", " in ", " not in ", " is ", " is not ", "", "" }; return s_cmp_strings[op()]; diff --git a/ASTNode.h b/ASTNode.h index 93ec330..1486eb9 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -12,10 +12,13 @@ public: NODE_INVALID, NODE_NODELIST, NODE_OBJECT, NODE_UNARY, NODE_BINARY, NODE_COMPARE, NODE_STORE, NODE_RETURN, NODE_NAME, NODE_DELETE, NODE_FUNCTION, NODE_CLASS, NODE_CALL, NODE_IMPORT, NODE_TUPLE, - NODE_LIST, NODE_MAP, NODE_SUBSCR, NODE_PRINT, + NODE_LIST, NODE_MAP, NODE_SUBSCR, NODE_PRINT, NODE_JUMP, // Empty nodes - NODE_PASS, NODE_LOCALS + NODE_PASS, NODE_LOCALS, + + //Hack to unindent + NODE_POP_HACK }; ASTNode(int type = NODE_INVALID) : m_refs(0), m_type(type) { } @@ -68,7 +71,7 @@ private: class ASTUnary : public ASTNode { public: enum UnOp { - UN_POSITIVE, UN_NEGATIVE, UN_INVERT, UN_NOT + UN_POSITIVE, UN_NEGATIVE, UN_INVERT, UN_NOT, UN_CONVERT }; ASTUnary(PycRef operand, int op) @@ -91,7 +94,10 @@ public: enum BinOp { BIN_ATTR, BIN_POWER, BIN_MULTIPLY, BIN_DIVIDE, BIN_FLOOR, BIN_MODULO, BIN_ADD, BIN_SUBTRACT, BIN_LSHIFT, BIN_RSHIFT, BIN_AND, BIN_XOR, - BIN_OR, BIN_LOG_AND, BIN_LOG_OR + BIN_OR, BIN_LOG_AND, BIN_LOG_OR, BIN_IP_ADD, BIN_IP_SUBTRACT, + BIN_IP_MULTIPLY, BIN_IP_DIVIDE, BIN_IP_MODULO, BIN_IP_POWER, + BIN_IP_LSHIFT, BIN_IP_RSHIFT, BIN_IP_AND, BIN_IP_XOR, BIN_IP_OR, + BIN_IP_FLOOR, BIN_SUBSCR }; ASTBinary(PycRef left, PycRef right, int op, @@ -311,4 +317,27 @@ private: PycRef m_value; }; + +class ASTJump : public ASTNode { +public: + enum Condition { JUMP, JMP_FALSE, JMP_TRUE }; + + ASTJump(int dest, Condition jtype, PycRef cond) + : ASTNode(NODE_JUMP), m_dest(dest), m_jtype(jtype), m_cond(cond) {} + + int dest() const { return m_dest; } + Condition jtype() const { return m_jtype; } + PycRef cond() const { return m_cond; } + +private: + int m_dest; + Condition m_jtype; + PycRef m_cond; +}; + +class ASTPopHack : public ASTNode { +public: + ASTPopHack() : ASTNode(NODE_POP_HACK); +}; + #endif diff --git a/ASTree.cpp b/ASTree.cpp index 8f9f47b..8dfb959 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -148,6 +148,17 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(new ASTBinary(left, right, ASTBinary::BIN_RSHIFT)); } break; + case (PY_1000 | Py1k::BINARY_SUBSCR): + case (PY_2000 | Py2k::BINARY_SUBSCR): + case (PY_3000 | Py3k::BINARY_SUBSCR): + { + PycRef right = stack.top(); + stack.pop(); + PycRef left = stack.top(); + stack.pop(); + stack.push(new ASTBinary(left, right, ASTBinary::BIN_SUBSCR)); + } + break; case (PY_1000 | Py1k::BINARY_SUBTRACT): case (PY_2000 | Py2k::BINARY_SUBTRACT): case (PY_3000 | Py3k::BINARY_SUBTRACT): @@ -297,6 +308,28 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) lines.push_back(new ASTStore(import, Node_NULL)); } break; + case (PY_2000 | Py2k::INPLACE_ADD): + case (PY_3000 | Py3k::INPLACE_ADD): + { + PycRef right = stack.top(); + stack.pop(); + PycRef src = stack.top(); + stack.pop(); + /* This is a problem, so fake it with a = a + b syntax */ + stack.push(new ASTBinary(src, right, ASTBinary::BIN_AND)); + } + break; + case (PY_2000 | Py2k::INPLACE_SUBTRACT): + case (PY_3000 | Py3k::INPLACE_SUBTRACT): + { + PycRef right = stack.top(); + stack.pop(); + PycRef src = stack.top(); + stack.pop(); + /* This is a problem, so fake it with a = a - b syntax */ + stack.push(new ASTBinary(src, right, ASTBinary::BIN_SUBTRACT)); + } + break; case (PY_1000 | Py1k::LOAD_ATTR): case (PY_2000 | Py2k::LOAD_ATTR): case (PY_3000 | Py3k::LOAD_ATTR): @@ -355,7 +388,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) { PycRef value = stack.top(); stack.pop(); - if (value->type() == ASTNode::NODE_CALL) + if (value->type() == ASTNode::NODE_CALL || value->type() == ASTNode::NODE_JUMP) lines.push_back(value); } break; @@ -607,6 +640,7 @@ static void end_line() printf("\n"); } +int cur_indent = 0; void print_src(PycRef node, PycModule* mod, int indent) { switch (node->type()) { @@ -617,6 +651,15 @@ void print_src(PycRef node, PycModule* mod, int indent) print_ordered(node, bin->left(), mod, indent); printf("%s", bin->op_str()); print_ordered(node, bin->right(), mod, indent); + if (bin->op() == ASTBinary::BIN_SUBSCR) + printf("]"); + } + break; + case ASTNode::NODE_UNARY: + { + PycRef un = node.cast(); + printf("%s", un->op_str()); + print_ordered(node, un->operand(), mod, indent); } break; case ASTNode::NODE_CALL: @@ -647,7 +690,7 @@ void print_src(PycRef node, PycModule* mod, int indent) for (ASTList::value_t::const_iterator b = values.begin(); b != values.end(); ++b) { if (first) printf("\n"); else printf(",\n"); - start_line(indent + 1); + start_line(cur_indent + indent + 1); print_src(*b, mod, indent + 1); first = false; } @@ -662,7 +705,7 @@ void print_src(PycRef node, PycModule* mod, int indent) for (ASTMap::map_t::const_iterator b = values.begin(); b != values.end(); ++b) { if (first) printf("\n"); else printf(",\n"); - start_line(indent + 1); + start_line(cur_indent + indent + 1); print_src(b->first, mod, indent + 1); printf(": "); print_src(b->second, mod, indent + 1); @@ -678,7 +721,7 @@ void print_src(PycRef node, PycModule* mod, int indent) { ASTNodeList::list_t lines = node.cast()->nodes(); for (ASTNodeList::list_t::const_iterator ln = lines.begin(); ln != lines.end(); ++ln) { - start_line(indent); + start_line(cur_indent + indent); print_src(*ln, mod, indent); end_line(); } @@ -717,6 +760,7 @@ void print_src(PycRef node, PycModule* mod, int indent) PycRef src = node.cast()->src(); PycRef dest = node.cast()->dest(); if (src->type() == ASTNode::NODE_FUNCTION) { + cur_indent = 0; printf("\n"); start_line(indent); printf("def "); @@ -738,7 +782,7 @@ void print_src(PycRef node, PycModule* mod, int indent) print_src(code, mod, indent + 1); } else if (src->type() == ASTNode::NODE_CLASS) { printf("\n"); - start_line(indent); + start_line(cur_indent + indent); printf("class "); print_src(dest, mod, indent); PycRef bases = src.cast()->bases().cast(); @@ -851,11 +895,13 @@ void decompyle(PycRef code, PycModule* mod, int indent) clean->append(new ASTNode(ASTNode::NODE_PASS)); inPrint = false; + cur_indent = 0; bool part1clean = cleanBuild; print_src(source, mod, indent); if (!cleanBuild || !part1clean) { start_line(indent); printf("# WARNING: Decompyle incomplete\n"); + cur_indent = 0; } } diff --git a/PythonBytecode.txt b/PythonBytecode.txt index 16425b6..ea87e78 100644 --- a/PythonBytecode.txt +++ b/PythonBytecode.txt @@ -6,6 +6,7 @@ Python MAGIC Python MAGIC Python MAGIC 1.4 0x0A0D1704 2.4 0x0A0DF26D 1.5 0x0A0D4E99 2.5 0x0A0DF2B3 1.6 0x0A0DC4FC 2.6 0x0A0DF2D1 + 2.7 0x0A0DF303 1.0 1.1 1.2 1.3 1.4 1.5 1.6 diff --git a/bytecode.cpp b/bytecode.cpp index 312012c..1cdf9e6 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -170,6 +170,12 @@ bool Py1k::IsVarNameArg(int opcode) (opcode == Py1k::STORE_FAST); } +bool Py1k::IsJumpOffsetArg(int opcode) +{ + return (opcode == Py1k::JUMP_FORWARD) || (opcode == Py1k::JUMP_IF_FALSE) || + (opcode == Py1k::JUMP_IF_TRUE); +} + bool Py2k::IsConstArg(int opcode) { return (opcode == Py2k::LOAD_CONST); @@ -197,6 +203,12 @@ bool Py2k::IsCellArg(int opcode) (opcode == Py2k::STORE_DEREF); } +bool Py2k::IsJumpOffsetArg(int opcode) +{ + return (opcode == Py2k::JUMP_FORWARD) || (opcode == Py2k::JUMP_IF_FALSE) || + (opcode == Py2k::JUMP_IF_TRUE); +} + bool Py3k::IsConstArg(int opcode) { return (opcode == Py3k::LOAD_CONST); @@ -224,6 +236,13 @@ bool Py3k::IsCellArg(int opcode) (opcode == Py3k::STORE_DEREF); } +bool Py3k::IsJumpOffsetArg(int opcode) +{ + return (opcode == Py3k::JUMP_FORWARD) || (opcode == Py3k::JUMP_IF_FALSE) || + (opcode == Py3k::JUMP_IF_TRUE) || (opcode == Py3k::POP_JUMP_IF_FALSE) || + (opcode == Py3k::POP_JUMP_IF_TRUE); +} + void print_const(PycRef obj, PycModule* mod) { @@ -405,6 +424,10 @@ void bc_disasm(PycRef code, PycModule* mod, int indent) (mod->majorVer() == 3 && Py3k::IsCellArg(opcode))) { printf("%d: ", operand); print_const(code->getConst(operand), mod); + } else if ((mod->majorVer() == 1 && Py1k::IsJumpOffsetArg(opcode)) || + (mod->majorVer() == 2 && Py2k::IsJumpOffsetArg(opcode)) || + (mod->majorVer() == 3 && Py3k::IsJumpOffsetArg(opcode))) { + printf("%d (to %d)", operand, pos+operand); } else { printf("%d", operand); } diff --git a/bytecode.h b/bytecode.h index 9d9d86a..2d61b38 100644 --- a/bytecode.h +++ b/bytecode.h @@ -39,6 +39,7 @@ extern const char* OpcodeNames[256]; bool IsConstArg(int opcode); bool IsNameArg(int opcode); bool IsVarNameArg(int opcode); +bool IsJumpOffsetArg(int opcode); } @@ -84,6 +85,7 @@ bool IsConstArg(int opcode); bool IsNameArg(int opcode); bool IsVarNameArg(int opcode); bool IsCellArg(int opcode); +bool IsJumpOffsetArg(int opcode); } @@ -127,6 +129,7 @@ bool IsConstArg(int opcode); bool IsNameArg(int opcode); bool IsVarNameArg(int opcode); bool IsCellArg(int opcode); +bool IsJumpOffsetArg(int opcode); } diff --git a/module.cpp b/module.cpp index 9f09f45..79a0d2c 100644 --- a/module.cpp +++ b/module.cpp @@ -89,6 +89,12 @@ void PycModule::setVersion(unsigned int magic) m_maj = 2; m_min = 6; break; + case MAGIC_2_7+1: + m_unicode = true; + /* Fall through */ + case MAGIC_2_7: + m_maj = 2; + m_min = 7; /* 3.0 and above are always unicode */ case MAGIC_3_0+1: diff --git a/module.h b/module.h index e063d7a..cfe0f26 100644 --- a/module.h +++ b/module.h @@ -19,6 +19,7 @@ enum PycMagic { MAGIC_2_4 = 0x0A0DF26D, MAGIC_2_5 = 0x0A0DF2B3, MAGIC_2_6 = 0x0A0DF2D1, + MAGIC_2_7 = 0x0A0DF303, MAGIC_3_0 = 0x0A0D0C3A, MAGIC_3_1 = 0x0A0D0C4E, diff --git a/pycdas.cpp b/pycdas.cpp index 79f1725..cec2f9f 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -1,4 +1,5 @@ #include +#include #include #include "module.h" #include "bytecode.h"