From b89ae8b2ac7de5e0da769ce70375f7772bbe31b5 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Sun, 26 Jul 2009 10:07:13 +0000 Subject: [PATCH] A little bit of code support now --- ASTNode.cpp | 13 +++ ASTNode.h | 157 ++++++++++++++++++++++++++++++++++++ ASTree.cpp | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++ ASTree.h | 19 +++++ FastStack.h | 47 +++++++++++ Makefile | 18 ++++- bytecode.cpp | 55 +++++++------ bytecode.h | 3 + code.h | 17 ++++ pycdc.cpp | 21 +++++ sequence.cpp | 9 +++ sequence.h | 1 + 12 files changed, 556 insertions(+), 28 deletions(-) create mode 100644 ASTNode.cpp create mode 100644 ASTNode.h create mode 100644 ASTree.cpp create mode 100644 ASTree.h create mode 100644 FastStack.h create mode 100644 pycdc.cpp diff --git a/ASTNode.cpp b/ASTNode.cpp new file mode 100644 index 0000000..6bd7d1b --- /dev/null +++ b/ASTNode.cpp @@ -0,0 +1,13 @@ +#include "ASTNode.h" + +PycRef Node_NULL = (ASTNode*)0; + +/* ASTCompare */ +const char* ASTCompare::op_str() const +{ + static const char* s_cmp_strings[] = { + "<", "<=", "==", "!=", ">", ">=", "in", "not in", "is", "is not", + "", "" + }; + return s_cmp_strings[m_op]; +} diff --git a/ASTNode.h b/ASTNode.h new file mode 100644 index 0000000..74107fd --- /dev/null +++ b/ASTNode.h @@ -0,0 +1,157 @@ +#ifndef _PYC_ASTNODE_H +#define _PYC_ASTNODE_H + +#include "module.h" +#include + +/* Similar interface to PycObject, so PycRef can work on it... * + * However, this does *NOT* mean the two are interchangeable! */ +class ASTNode { +public: + enum Type { + NODE_INVALID, NODE_LIST, NODE_OBJECT, NODE_UNARY, NODE_BINARY, + NODE_COMPARE, NODE_STORE, NODE_RETURN, NODE_NAME, NODE_DELETE + }; + + ASTNode(int type = NODE_INVALID) : m_refs(0), m_type(type) { } + virtual ~ASTNode() { } + + int type() const { return (this) ? m_type : NODE_INVALID; } + +private: + int m_refs; + int m_type; + +public: + void addRef() { if (this) ++m_refs; } + void delRef() { if (this && --m_refs == 0) delete this; } +}; + +/* A NULL node for comparison */ +extern PycRef Node_NULL; + + +class ASTNodeList : public ASTNode { +public: + typedef std::list > list_t; + + ASTNodeList(list_t nodes) + : ASTNode(NODE_LIST), m_nodes(nodes) { } + + list_t nodes() const { return m_nodes; } + +private: + list_t m_nodes; +}; + + +class ASTObject : public ASTNode { +public: + ASTObject(PycRef obj) + : ASTNode(NODE_OBJECT), m_obj(obj) { } + + PycRef object() const { return m_obj; } + +private: + PycRef m_obj; +}; + + +class ASTUnary : public ASTNode { +public: + ASTUnary(PycRef operand) + : ASTNode(NODE_UNARY), m_operand(operand) { } + + PycRef operand() const { return m_operand; } + +private: + PycRef m_operand; +}; + + +class ASTBinary : public ASTNode { +public: + ASTBinary(PycRef left, PycRef right, int type = NODE_BINARY) + : ASTNode(type), m_left(left), m_right(right) { } + + PycRef left() const { return m_left; } + PycRef right() const { return m_right; } + +private: + PycRef m_left; + PycRef m_right; +}; + + +class ASTCompare : public ASTBinary { +public: + enum CompareOp { + CMP_LESS, CMP_LESS_EQUAL, CMP_EQUAL, CMP_NOT_EQUAL, CMP_GREATER, + CMP_GREATER_EQUAL, CMP_IN, CMP_NOT_IN, CMP_IS, CMP_IS_NOT, + CMP_EXCEPTION, CMP_BAD + }; + + ASTCompare(PycRef left, PycRef right, CompareOp op) + : ASTBinary(left, right, NODE_COMPARE), m_op(op) { } + + CompareOp op() const { return m_op; } + const char* op_str() const; + +private: + CompareOp m_op; +}; + + +class ASTStore : public ASTNode { +public: + ASTStore(PycRef src, PycRef dest) + : ASTNode(NODE_STORE), m_src(src), m_dest(dest) { } + + PycRef src() const { return m_src; } + PycRef dest() const { return m_dest; } + +private: + PycRef m_src; + PycRef m_dest; +}; + + +class ASTReturn : public ASTNode { +public: + ASTReturn(PycRef value) + : ASTNode(NODE_RETURN), m_value(value) { } + + PycRef value() const { return m_value; } + +private: + PycRef m_value; +}; + + +class ASTName : public ASTNode { +public: + typedef std::list > name_t; + + ASTName(PycRef name) + : ASTNode(NODE_NAME) { m_name.push_back(name); } + + name_t name() const { return m_name; } + void add(PycRef name) { m_name.push_back(name); } + +private: + name_t m_name; +}; + + +class ASTDelete : public ASTNode { +public: + ASTDelete(PycRef value) + : ASTNode(NODE_DELETE), m_value(value) { } + + PycRef value() const { return m_value; } + +private: + PycRef m_value; +}; + +#endif diff --git a/ASTree.cpp b/ASTree.cpp new file mode 100644 index 0000000..61ecf5a --- /dev/null +++ b/ASTree.cpp @@ -0,0 +1,224 @@ +#include "ASTree.h" +#include "FastStack.h" +#include "bytecode.h" + +// These are used to avoid writing code 3 times for each of +// the different python generations +#define PY_1000 0x1000 +#define PY_2000 0x2000 +#define PY_3000 0x3000 + +PycRef BuildFromCode(PycRef code, PycModule* mod) +{ + PycBuffer source(code->code()->value(), code->code()->length()); + ASTNodeList::list_t lines; + + FastStack stack((mod->majorVer() == 1) ? 20 : code->stackSize()); + stackhist_t stack_hist; + + int opcode, operand; + int pos = 0; + + int opadd = 0; + if (mod->majorVer() == 1) + opadd = PY_1000; + else if (mod->majorVer() == 2) + opadd = PY_2000; + else if (mod->majorVer() == 3) + opadd = PY_3000; + + while (!source.atEof()) { + bc_next(source, mod, opcode, operand, pos); + opcode |= opadd; + + switch (opcode) { + //case Py2k::STOP_CODE: + //case Py2k::POP_TOP: + //case Py2k::ROT_TWO: + //case Py2k::ROT_THREE: + //case Py2k::DUP_TOP: + //case Py2k::ROT_FOUR: + //case Py2k::NOP: + //case Py2k::UNARY_POSITIVE: + //case Py2k::UNARY_NEGATIVE: + //case Py2k::UNARY_NOT: + //case Py2k::UNARY_CONVERT: + //case Py2k::UNARY_INVERT: + //case Py2k::LIST_APPEND: + //case Py2k::BINARY_POWER: + //case Py2k::BINARY_MULTIPLY: + //case Py2k::BINARY_DIVIDE: + //case Py2k::BINARY_MODULO: + //case Py2k::BINARY_ADD: + //case Py2k::BINARY_SUBTRACT: + //case Py2k::BINARY_SUBSCR: + //case Py2k::BINARY_FLOOR_DIVIDE: + //case Py2k::BINARY_TRUE_DIVIDE: + //case Py2k::INPLACE_FLOOR_DIVIDE: + //case Py2k::INPLACE_TRUE_DIVIDE: + //case Py2k::SLICE_0: + //case Py2k::SLICE_1: + //case Py2k::SLICE_2: + //case Py2k::SLICE_3: + //case Py2k::STORE_SLICE_0: + //case Py2k::STORE_SLICE_1: + //case Py2k::STORE_SLICE_2: + //case Py2k::STORE_SLICE_3: + //case Py2k::DELETE_SLICE_0: + //case Py2k::DELETE_SLICE_1: + //case Py2k::DELETE_SLICE_2: + //case Py2k::DELETE_SLICE_3: + //case Py2k::STORE_MAP: + //case Py2k::INPLACE_ADD: + //case Py2k::INPLACE_SUBTRACT: + //case Py2k::INPLACE_MULTIPLY: + //case Py2k::INPLACE_DIVIDE: + //case Py2k::INPLACE_MODULO: + //case Py2k::STORE_SUBSCR: + //case Py2k::DELETE_SUBSCR: + //case Py2k::BINARY_LSHIFT: + //case Py2k::BINARY_RSHIFT: + //case Py2k::BINARY_AND: + //case Py2k::BINARY_XOR: + //case Py2k::BINARY_OR: + //case Py2k::INPLACE_POWER: + //case Py2k::GET_ITER: + //case Py2k::PRINT_EXPR: + //case Py2k::PRINT_ITEM: + //case Py2k::PRINT_NEWLINE: + //case Py2k::PRINT_ITEM_TO: + //case Py2k::PRINT_NEWLINE_TO: + //case Py2k::INPLACE_LSHIFT: + //case Py2k::INPLACE_RSHIFT: + //case Py2k::INPLACE_AND: + //case Py2k::INPLACE_XOR: + //case Py2k::INPLACE_OR: + //case Py2k::BREAK_LOOP: + //case Py2k::WITH_CLEANUP: + //case Py2k::LOAD_LOCALS: + //case Py2k::RETURN_VALUE: + //case Py2k::IMPORT_STAR: + //case Py2k::EXEC_STMT: + //case Py2k::YIELD_VALUE: + //case Py2k::POP_BLOCK: + //case Py2k::END_FINALLY: + //case Py2k::BUILD_CLASS: + case (PY_1000 | Py1k::STORE_NAME): + case (PY_2000 | Py2k::STORE_NAME): + case (PY_3000 | Py3k::STORE_NAME): + { + PycRef value = stack.top(); + PycRef name = new ASTName(code->getName(operand)); + stack.pop(); + lines.push_back(new ASTStore(value, name)); + } + break; + //case Py2k::DELETE_NAME: + //case Py2k::UNPACK_SEQUENCE: + //case Py2k::FOR_ITER: + //case Py2k::STORE_ATTR: + //case Py2k::DELETE_ATTR: + //case Py2k::STORE_GLOBAL: + //case Py2k::DELETE_GLOBAL: + //case Py2k::DUP_TOPX: + case (PY_1000 | Py1k::LOAD_CONST): + case (PY_2000 | Py2k::LOAD_CONST): + case (PY_3000 | Py3k::LOAD_CONST): + stack.push(new ASTObject(code->getConst(operand))); + break; + //case Py2k::LOAD_NAME: + //case Py2k::BUILD_TUPLE: + //case Py2k::BUILD_LIST: + //case Py2k::BUILD_MAP: + //case Py2k::LOAD_ATTR: + //case Py2k::COMPARE_OP: + //case Py2k::IMPORT_NAME: + //case Py2k::IMPORT_FROM: + //case Py2k::JUMP_FORWARD: + //case Py2k::JUMP_IF_FALSE: + //case Py2k::JUMP_IF_TRUE: + //case Py2k::JUMP_ABSOLUTE: + //case Py2k::FOR_LOOP: + //case Py2k::LOAD_GLOBAL: + //case Py2k::CONTINUE_LOOP: + //case Py2k::SETUP_LOOP: + //case Py2k::SETUP_EXCEPT: + //case Py2k::SETUP_FINALLY: + //case Py2k::LOAD_FAST: + //case Py2k::STORE_FAST: + //case Py2k::DELETE_FAST: + //case Py2k::SET_LINENO: + //case Py2k::RAISE_VARARGS: + //case Py2k::CALL_FUNCTION: + //case Py2k::MAKE_FUNCTION: + //case Py2k::BUILD_SLICE: + //case Py2k::MAKE_CLOSURE: + //case Py2k::LOAD_CLOSURE: + //case Py2k::LOAD_DEREF: + //case Py2k::STORE_DEREF: + //case Py2k::CALL_FUNCTION_VAR: + //case Py2k::CALL_FUNCTION_KW: + //case Py2k::CALL_FUNCTION_VAR_KW: + //case Py2k::EXTENDED_ARG: + default: + if (mod->majorVer() == 1) + fprintf(stderr, "Unsupported opcode: %s\n", Py1k::OpcodeNames[opcode & 0xFF]); + else if (mod->majorVer() == 2) + fprintf(stderr, "Unsupported opcode: %s\n", Py2k::OpcodeNames[opcode & 0xFF]); + else if (mod->majorVer() == 3) + fprintf(stderr, "Unsupported opcode: %s\n", Py3k::OpcodeNames[opcode & 0xFF]); + return new ASTNodeList(lines); + } + } + + return new ASTNodeList(lines); +} + +static void start_indent(int indent) +{ + for (int i=0; i node, PycModule* mod, int indent = 0) +{ + switch (node->type()) { + case ASTNode::NODE_LIST: + { + ASTNodeList::list_t lines = node.cast()->nodes(); + for (ASTNodeList::list_t::iterator ln = lines.begin(); ln != lines.end(); ++ln) + print_src(*ln, mod, indent); + } + break; + case ASTNode::NODE_STORE: + { + PycRef src = node.cast()->src(); + PycRef dest = node.cast()->dest(); + start_indent(indent); + print_src(dest, mod); + printf(" = "); + print_src(src, mod); + printf("\n"); + } + break; + case ASTNode::NODE_OBJECT: + print_const(node.cast()->object(), mod); + break; + case ASTNode::NODE_NAME: + { + ASTName::name_t name = node.cast()->name(); + ASTName::name_t::iterator n = name.begin(); + printf("%s", (*n)->value()); + while (++n != name.end()) + printf(".%s", (*n)->value()); + } + break; + default: + printf("Unsupported Node type: %d\n", node->type()); + } +} + +void ASTree::printSource(PycModule* mod) const +{ + print_src(m_root, mod, 0); +} diff --git a/ASTree.h b/ASTree.h new file mode 100644 index 0000000..2b96de7 --- /dev/null +++ b/ASTree.h @@ -0,0 +1,19 @@ +#ifndef _PYC_ASTREE_H +#define _PYC_ASTREE_H + +#include "ASTNode.h" + +PycRef BuildFromCode(PycRef code, PycModule* mod); + +class ASTree { +public: + void load(PycModule* mod) + { m_root = BuildFromCode(mod->code(), mod); } + + void printSource(PycModule* mod) const; + +private: + PycRef m_root; +}; + +#endif diff --git a/FastStack.h b/FastStack.h new file mode 100644 index 0000000..3646eff --- /dev/null +++ b/FastStack.h @@ -0,0 +1,47 @@ +#ifndef _PYC_FASTSTACK_H +#define _PYC_FASTSTACK_H + +#include "ASTNode.h" +#include + +class FastStack { +public: + FastStack(int size) : m_size(size), m_ptr(-1) + { m_stack = new PycRef[m_size]; } + + FastStack(const FastStack& copy) : m_size(copy.m_size), m_ptr(copy.m_ptr) + { + m_stack = new PycRef[m_size]; + for (int i=0; i node) + { m_stack[++m_ptr] = node; } + + void pop() + { m_stack[m_ptr--] = Node_NULL; } + + PycRef top() const + { return m_stack[m_ptr]; } + + void replace(const FastStack& copy) + { + for (int i=0; i<=copy.m_ptr; i++) + m_stack[i] = copy.m_stack[i]; + for (int i=copy.m_ptr+1; i<=m_ptr; i++) + m_stack[i] = Node_NULL; + m_ptr = copy.m_ptr; + } + +private: + PycRef* m_stack; + int m_size, m_ptr; +}; + +typedef std::stack stackhist_t; + +#endif diff --git a/Makefile b/Makefile index 04ebc99..c7ae4c1 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CXX = g++ -CXXFLAGS = -g -Wall -O2 +CXXFLAGS = -g -Wall COMMON = \ out/module.o \ @@ -9,10 +9,13 @@ COMMON = \ out/numeric.o \ out/code.o \ out/sequence.o \ - out/string.o + out/string.o \ + out/ASTree.o \ + out/ASTNode.o ALL = \ - bin/pycdas + bin/pycdas \ + bin/pycdc PREFIX = /usr/local @@ -28,6 +31,9 @@ install: bin/pycdas: pycdas.cpp $(COMMON) $(CXX) $(CXXFLAGS) $(COMMON) pycdas.cpp -o $@ +bin/pycdc: pycdc.cpp $(COMMON) + $(CXX) $(CXXFLAGS) $(COMMON) pycdc.cpp -o $@ + out/module.o: module.h module.cpp $(CXX) $(CXXFLAGS) -c module.cpp -o $@ @@ -51,3 +57,9 @@ out/sequence.o: sequence.h sequence.cpp out/string.o: string.h string.cpp $(CXX) $(CXXFLAGS) -c string.cpp -o $@ + +out/ASTree.o: ASTree.h ASTree.cpp + $(CXX) $(CXXFLAGS) -c ASTree.cpp -o $@ + +out/ASTNode.o: ASTNode.h ASTNode.cpp + $(CXX) $(CXXFLAGS) -c ASTNode.cpp -o $@ diff --git a/bytecode.cpp b/bytecode.cpp index e259f69..7595caa 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -1,5 +1,4 @@ #include "bytecode.h" -#include "data.h" #include "numeric.h" const char* Py1k::OpcodeNames[256] = { @@ -226,7 +225,7 @@ bool Py3k::IsCellArg(int opcode) } -static void print_const(PycRef obj, PycModule* mod) +void print_const(PycRef obj, PycModule* mod) { switch (obj->type()) { case PycObject::TYPE_STRING: @@ -347,39 +346,45 @@ static void print_const(PycRef obj, PycModule* mod) } } +void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos) +{ + opcode = source.getByte(); + operand = 0; + bool haveExtArg = false; + pos += 1; + + if ((mod->majorVer() == 2 && opcode == Py2k::EXTENDED_ARG) || + (mod->majorVer() == 3 && opcode == Py3k::EXTENDED_ARG)) { + operand = source.get16() << 16; + opcode = source.getByte(); + haveExtArg = true; + pos += 3; + } + if (opcode >= HAVE_ARG) { + // If we have an extended arg, we want to OR the lower part, + // else we want the whole thing (in case it's negative). We use + // the bool so that values between 0x8000 and 0xFFFF can be stored + // without becoming negative + if (haveExtArg) + operand |= (source.get16() & 0xFFFF); + else + operand = source.get16(); + pos += 2; + } +} + void bc_disasm(PycRef code, PycModule* mod, int indent) { PycBuffer source(code->code()->value(), code->code()->length()); + int opcode, operand; int pos = 0; while (!source.atEof()) { for (int i=0; imajorVer() == 2 && opcode == Py2k::EXTENDED_ARG) || - (mod->majorVer() == 3 && opcode == Py3k::EXTENDED_ARG)) { - operand = source.get16() << 16; - opcode = source.getByte(); - haveExtArg = true; - pos += 3; - } - if (opcode >= HAVE_ARG) { - // If we have an extended arg, we want to OR the lower part, - // else we want the whole thing (in case it's negative). We use - // the bool so that values between 0x8000 and 0xFFFF can be stored - // without becoming negative - if (haveExtArg) - operand |= (source.get16() & 0xFFFF); - else - operand = source.get16(); - pos += 2; - } + bc_next(source, mod, opcode, operand, pos); if (mod->majorVer() == 1) { printf("%-24s", Py1k::OpcodeNames[opcode]); diff --git a/bytecode.h b/bytecode.h index 8319383..9d9d86a 100644 --- a/bytecode.h +++ b/bytecode.h @@ -1,5 +1,6 @@ #include "code.h" #include "module.h" +#include "data.h" // Opcodes >= this value have an argument after the opcode #define HAVE_ARG 90 @@ -129,4 +130,6 @@ bool IsCellArg(int opcode); } +void print_const(PycRef obj, PycModule* mod); +void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos); void bc_disasm(PycRef code, PycModule* mod, int indent); diff --git a/code.h b/code.h index 60b3b26..4519bb1 100644 --- a/code.h +++ b/code.h @@ -6,6 +6,23 @@ class PycCode : public PycObject { public: + enum CodeFlags { + CO_OPTIMIZED = 0x1, + CO_NEWLOCALS = 0x2, + CO_VARARGS = 0x4, + CO_VARKEYWORDS = 0x8, + CO_NESTED = 0x10, + CO_GENERATOR = 0x20, + CO_NOFREE = 0x40, + CO_GENERATOR_ALLOWED = 0x1000, + CO_FUTURE_DIVISION = 0x2000, + CO_FUTURE_ABSOLUTE_IMPORT = 0x4000, + CO_FUTURE_WITH_STATEMENT = 0x8000, + CO_FUTURE_PRINT_FUNCTION = 0x10000, + CO_FUTURE_UNICODE_LITERALS = 0x20000, + CO_FUTURE_BARRY_AS_BDFL = 0x40000, + }; + PycCode(int type = TYPE_CODE) : PycObject(type), m_argCount(0), m_kwOnlyArgCount(0), m_numLocals(0), m_stackSize(0), m_flags(0), m_firstLine(0) { } diff --git a/pycdc.cpp b/pycdc.cpp new file mode 100644 index 0000000..7de71a5 --- /dev/null +++ b/pycdc.cpp @@ -0,0 +1,21 @@ +#include "ASTree.h" + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + fprintf(stderr, "No input file specified\n"); + return 1; + } + + PycModule mod; + mod.loadFromFile(argv[1]); + printf("# Source Generated with Decompyle++ pycdc\n"); + printf("# File: %s (Python %d.%d%s)\n", argv[1], mod.majorVer(), mod.minorVer(), + (mod.majorVer() < 3 && mod.isUnicode()) ? " Unicode" : ""); + + ASTree source; + source.load(&mod); + source.printSource(&mod); + + return 0; +} diff --git a/sequence.cpp b/sequence.cpp index 076755c..4f2ab12 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -3,6 +3,15 @@ #include "module.h" /* PycTuple */ +PycRef PycTuple::Build(const value_t& items) +{ + PycRef tupleObj = new PycTuple(); + tupleObj->m_size = items.size(); + tupleObj->m_values.resize(tupleObj->m_size); + std::copy(items.begin(), items.end(), tupleObj->m_values.begin()); + return tupleObj; +} + void PycTuple::load(PycData* stream, PycModule* mod) { m_size = stream->get32(); diff --git a/sequence.h b/sequence.h index 941320a..8ce3654 100644 --- a/sequence.h +++ b/sequence.h @@ -22,6 +22,7 @@ public: typedef std::vector > value_t; PycTuple(int type = TYPE_TUPLE) : PycSequence(type) { } + static PycRef Build(const value_t& items); bool isEqual(PycRef obj) const;