A little bit of code support now

This commit is contained in:
Michael Hansen
2009-07-26 10:07:13 +00:00
parent 98a50fd042
commit b89ae8b2ac
12 changed files with 556 additions and 28 deletions

13
ASTNode.cpp Normal file
View File

@@ -0,0 +1,13 @@
#include "ASTNode.h"
PycRef<ASTNode> Node_NULL = (ASTNode*)0;
/* ASTCompare */
const char* ASTCompare::op_str() const
{
static const char* s_cmp_strings[] = {
"<", "<=", "==", "!=", ">", ">=", "in", "not in", "is", "is not",
"<EXCEPTION MATCH>", "<BAD>"
};
return s_cmp_strings[m_op];
}

157
ASTNode.h Normal file
View File

@@ -0,0 +1,157 @@
#ifndef _PYC_ASTNODE_H
#define _PYC_ASTNODE_H
#include "module.h"
#include <list>
/* Similar interface to PycObject, so PycRef can work on it... *
* However, this does *NOT* mean the two are interchangeable! */
class ASTNode {
public:
enum Type {
NODE_INVALID, NODE_LIST, NODE_OBJECT, NODE_UNARY, NODE_BINARY,
NODE_COMPARE, NODE_STORE, NODE_RETURN, NODE_NAME, NODE_DELETE
};
ASTNode(int type = NODE_INVALID) : m_refs(0), m_type(type) { }
virtual ~ASTNode() { }
int type() const { return (this) ? m_type : NODE_INVALID; }
private:
int m_refs;
int m_type;
public:
void addRef() { if (this) ++m_refs; }
void delRef() { if (this && --m_refs == 0) delete this; }
};
/* A NULL node for comparison */
extern PycRef<ASTNode> Node_NULL;
class ASTNodeList : public ASTNode {
public:
typedef std::list<PycRef<ASTNode> > list_t;
ASTNodeList(list_t nodes)
: ASTNode(NODE_LIST), m_nodes(nodes) { }
list_t nodes() const { return m_nodes; }
private:
list_t m_nodes;
};
class ASTObject : public ASTNode {
public:
ASTObject(PycRef<PycObject> obj)
: ASTNode(NODE_OBJECT), m_obj(obj) { }
PycRef<PycObject> object() const { return m_obj; }
private:
PycRef<PycObject> m_obj;
};
class ASTUnary : public ASTNode {
public:
ASTUnary(PycRef<ASTNode> operand)
: ASTNode(NODE_UNARY), m_operand(operand) { }
PycRef<ASTNode> operand() const { return m_operand; }
private:
PycRef<ASTNode> m_operand;
};
class ASTBinary : public ASTNode {
public:
ASTBinary(PycRef<ASTNode> left, PycRef<ASTNode> right, int type = NODE_BINARY)
: ASTNode(type), m_left(left), m_right(right) { }
PycRef<ASTNode> left() const { return m_left; }
PycRef<ASTNode> right() const { return m_right; }
private:
PycRef<ASTNode> m_left;
PycRef<ASTNode> m_right;
};
class ASTCompare : public ASTBinary {
public:
enum CompareOp {
CMP_LESS, CMP_LESS_EQUAL, CMP_EQUAL, CMP_NOT_EQUAL, CMP_GREATER,
CMP_GREATER_EQUAL, CMP_IN, CMP_NOT_IN, CMP_IS, CMP_IS_NOT,
CMP_EXCEPTION, CMP_BAD
};
ASTCompare(PycRef<ASTNode> left, PycRef<ASTNode> right, CompareOp op)
: ASTBinary(left, right, NODE_COMPARE), m_op(op) { }
CompareOp op() const { return m_op; }
const char* op_str() const;
private:
CompareOp m_op;
};
class ASTStore : public ASTNode {
public:
ASTStore(PycRef<ASTNode> src, PycRef<ASTNode> dest)
: ASTNode(NODE_STORE), m_src(src), m_dest(dest) { }
PycRef<ASTNode> src() const { return m_src; }
PycRef<ASTNode> dest() const { return m_dest; }
private:
PycRef<ASTNode> m_src;
PycRef<ASTNode> m_dest;
};
class ASTReturn : public ASTNode {
public:
ASTReturn(PycRef<ASTNode> value)
: ASTNode(NODE_RETURN), m_value(value) { }
PycRef<ASTNode> value() const { return m_value; }
private:
PycRef<ASTNode> m_value;
};
class ASTName : public ASTNode {
public:
typedef std::list<PycRef<PycString> > name_t;
ASTName(PycRef<PycString> name)
: ASTNode(NODE_NAME) { m_name.push_back(name); }
name_t name() const { return m_name; }
void add(PycRef<PycString> name) { m_name.push_back(name); }
private:
name_t m_name;
};
class ASTDelete : public ASTNode {
public:
ASTDelete(PycRef<ASTNode> value)
: ASTNode(NODE_DELETE), m_value(value) { }
PycRef<ASTNode> value() const { return m_value; }
private:
PycRef<ASTNode> m_value;
};
#endif

224
ASTree.cpp Normal file
View File

@@ -0,0 +1,224 @@
#include "ASTree.h"
#include "FastStack.h"
#include "bytecode.h"
// These are used to avoid writing code 3 times for each of
// the different python generations
#define PY_1000 0x1000
#define PY_2000 0x2000
#define PY_3000 0x3000
PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
{
PycBuffer source(code->code()->value(), code->code()->length());
ASTNodeList::list_t lines;
FastStack stack((mod->majorVer() == 1) ? 20 : code->stackSize());
stackhist_t stack_hist;
int opcode, operand;
int pos = 0;
int opadd = 0;
if (mod->majorVer() == 1)
opadd = PY_1000;
else if (mod->majorVer() == 2)
opadd = PY_2000;
else if (mod->majorVer() == 3)
opadd = PY_3000;
while (!source.atEof()) {
bc_next(source, mod, opcode, operand, pos);
opcode |= opadd;
switch (opcode) {
//case Py2k::STOP_CODE:
//case Py2k::POP_TOP:
//case Py2k::ROT_TWO:
//case Py2k::ROT_THREE:
//case Py2k::DUP_TOP:
//case Py2k::ROT_FOUR:
//case Py2k::NOP:
//case Py2k::UNARY_POSITIVE:
//case Py2k::UNARY_NEGATIVE:
//case Py2k::UNARY_NOT:
//case Py2k::UNARY_CONVERT:
//case Py2k::UNARY_INVERT:
//case Py2k::LIST_APPEND:
//case Py2k::BINARY_POWER:
//case Py2k::BINARY_MULTIPLY:
//case Py2k::BINARY_DIVIDE:
//case Py2k::BINARY_MODULO:
//case Py2k::BINARY_ADD:
//case Py2k::BINARY_SUBTRACT:
//case Py2k::BINARY_SUBSCR:
//case Py2k::BINARY_FLOOR_DIVIDE:
//case Py2k::BINARY_TRUE_DIVIDE:
//case Py2k::INPLACE_FLOOR_DIVIDE:
//case Py2k::INPLACE_TRUE_DIVIDE:
//case Py2k::SLICE_0:
//case Py2k::SLICE_1:
//case Py2k::SLICE_2:
//case Py2k::SLICE_3:
//case Py2k::STORE_SLICE_0:
//case Py2k::STORE_SLICE_1:
//case Py2k::STORE_SLICE_2:
//case Py2k::STORE_SLICE_3:
//case Py2k::DELETE_SLICE_0:
//case Py2k::DELETE_SLICE_1:
//case Py2k::DELETE_SLICE_2:
//case Py2k::DELETE_SLICE_3:
//case Py2k::STORE_MAP:
//case Py2k::INPLACE_ADD:
//case Py2k::INPLACE_SUBTRACT:
//case Py2k::INPLACE_MULTIPLY:
//case Py2k::INPLACE_DIVIDE:
//case Py2k::INPLACE_MODULO:
//case Py2k::STORE_SUBSCR:
//case Py2k::DELETE_SUBSCR:
//case Py2k::BINARY_LSHIFT:
//case Py2k::BINARY_RSHIFT:
//case Py2k::BINARY_AND:
//case Py2k::BINARY_XOR:
//case Py2k::BINARY_OR:
//case Py2k::INPLACE_POWER:
//case Py2k::GET_ITER:
//case Py2k::PRINT_EXPR:
//case Py2k::PRINT_ITEM:
//case Py2k::PRINT_NEWLINE:
//case Py2k::PRINT_ITEM_TO:
//case Py2k::PRINT_NEWLINE_TO:
//case Py2k::INPLACE_LSHIFT:
//case Py2k::INPLACE_RSHIFT:
//case Py2k::INPLACE_AND:
//case Py2k::INPLACE_XOR:
//case Py2k::INPLACE_OR:
//case Py2k::BREAK_LOOP:
//case Py2k::WITH_CLEANUP:
//case Py2k::LOAD_LOCALS:
//case Py2k::RETURN_VALUE:
//case Py2k::IMPORT_STAR:
//case Py2k::EXEC_STMT:
//case Py2k::YIELD_VALUE:
//case Py2k::POP_BLOCK:
//case Py2k::END_FINALLY:
//case Py2k::BUILD_CLASS:
case (PY_1000 | Py1k::STORE_NAME):
case (PY_2000 | Py2k::STORE_NAME):
case (PY_3000 | Py3k::STORE_NAME):
{
PycRef<ASTNode> value = stack.top();
PycRef<ASTNode> name = new ASTName(code->getName(operand));
stack.pop();
lines.push_back(new ASTStore(value, name));
}
break;
//case Py2k::DELETE_NAME:
//case Py2k::UNPACK_SEQUENCE:
//case Py2k::FOR_ITER:
//case Py2k::STORE_ATTR:
//case Py2k::DELETE_ATTR:
//case Py2k::STORE_GLOBAL:
//case Py2k::DELETE_GLOBAL:
//case Py2k::DUP_TOPX:
case (PY_1000 | Py1k::LOAD_CONST):
case (PY_2000 | Py2k::LOAD_CONST):
case (PY_3000 | Py3k::LOAD_CONST):
stack.push(new ASTObject(code->getConst(operand)));
break;
//case Py2k::LOAD_NAME:
//case Py2k::BUILD_TUPLE:
//case Py2k::BUILD_LIST:
//case Py2k::BUILD_MAP:
//case Py2k::LOAD_ATTR:
//case Py2k::COMPARE_OP:
//case Py2k::IMPORT_NAME:
//case Py2k::IMPORT_FROM:
//case Py2k::JUMP_FORWARD:
//case Py2k::JUMP_IF_FALSE:
//case Py2k::JUMP_IF_TRUE:
//case Py2k::JUMP_ABSOLUTE:
//case Py2k::FOR_LOOP:
//case Py2k::LOAD_GLOBAL:
//case Py2k::CONTINUE_LOOP:
//case Py2k::SETUP_LOOP:
//case Py2k::SETUP_EXCEPT:
//case Py2k::SETUP_FINALLY:
//case Py2k::LOAD_FAST:
//case Py2k::STORE_FAST:
//case Py2k::DELETE_FAST:
//case Py2k::SET_LINENO:
//case Py2k::RAISE_VARARGS:
//case Py2k::CALL_FUNCTION:
//case Py2k::MAKE_FUNCTION:
//case Py2k::BUILD_SLICE:
//case Py2k::MAKE_CLOSURE:
//case Py2k::LOAD_CLOSURE:
//case Py2k::LOAD_DEREF:
//case Py2k::STORE_DEREF:
//case Py2k::CALL_FUNCTION_VAR:
//case Py2k::CALL_FUNCTION_KW:
//case Py2k::CALL_FUNCTION_VAR_KW:
//case Py2k::EXTENDED_ARG:
default:
if (mod->majorVer() == 1)
fprintf(stderr, "Unsupported opcode: %s\n", Py1k::OpcodeNames[opcode & 0xFF]);
else if (mod->majorVer() == 2)
fprintf(stderr, "Unsupported opcode: %s\n", Py2k::OpcodeNames[opcode & 0xFF]);
else if (mod->majorVer() == 3)
fprintf(stderr, "Unsupported opcode: %s\n", Py3k::OpcodeNames[opcode & 0xFF]);
return new ASTNodeList(lines);
}
}
return new ASTNodeList(lines);
}
static void start_indent(int indent)
{
for (int i=0; i<indent; i++)
printf(" ");
}
static void print_src(PycRef<ASTNode> node, PycModule* mod, int indent = 0)
{
switch (node->type()) {
case ASTNode::NODE_LIST:
{
ASTNodeList::list_t lines = node.cast<ASTNodeList>()->nodes();
for (ASTNodeList::list_t::iterator ln = lines.begin(); ln != lines.end(); ++ln)
print_src(*ln, mod, indent);
}
break;
case ASTNode::NODE_STORE:
{
PycRef<ASTNode> src = node.cast<ASTStore>()->src();
PycRef<ASTNode> dest = node.cast<ASTStore>()->dest();
start_indent(indent);
print_src(dest, mod);
printf(" = ");
print_src(src, mod);
printf("\n");
}
break;
case ASTNode::NODE_OBJECT:
print_const(node.cast<ASTObject>()->object(), mod);
break;
case ASTNode::NODE_NAME:
{
ASTName::name_t name = node.cast<ASTName>()->name();
ASTName::name_t::iterator n = name.begin();
printf("%s", (*n)->value());
while (++n != name.end())
printf(".%s", (*n)->value());
}
break;
default:
printf("Unsupported Node type: %d\n", node->type());
}
}
void ASTree::printSource(PycModule* mod) const
{
print_src(m_root, mod, 0);
}

19
ASTree.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef _PYC_ASTREE_H
#define _PYC_ASTREE_H
#include "ASTNode.h"
PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod);
class ASTree {
public:
void load(PycModule* mod)
{ m_root = BuildFromCode(mod->code(), mod); }
void printSource(PycModule* mod) const;
private:
PycRef<ASTNode> m_root;
};
#endif

47
FastStack.h Normal file
View File

@@ -0,0 +1,47 @@
#ifndef _PYC_FASTSTACK_H
#define _PYC_FASTSTACK_H
#include "ASTNode.h"
#include <stack>
class FastStack {
public:
FastStack(int size) : m_size(size), m_ptr(-1)
{ m_stack = new PycRef<ASTNode>[m_size]; }
FastStack(const FastStack& copy) : m_size(copy.m_size), m_ptr(copy.m_ptr)
{
m_stack = new PycRef<ASTNode>[m_size];
for (int i=0; i<m_size; i++)
m_stack[i] = copy.m_stack[i];
}
~FastStack()
{ delete[] m_stack; }
void push(PycRef<ASTNode> node)
{ m_stack[++m_ptr] = node; }
void pop()
{ m_stack[m_ptr--] = Node_NULL; }
PycRef<ASTNode> top() const
{ return m_stack[m_ptr]; }
void replace(const FastStack& copy)
{
for (int i=0; i<=copy.m_ptr; i++)
m_stack[i] = copy.m_stack[i];
for (int i=copy.m_ptr+1; i<=m_ptr; i++)
m_stack[i] = Node_NULL;
m_ptr = copy.m_ptr;
}
private:
PycRef<ASTNode>* m_stack;
int m_size, m_ptr;
};
typedef std::stack<FastStack> stackhist_t;
#endif

View File

@@ -1,5 +1,5 @@
CXX = g++
CXXFLAGS = -g -Wall -O2
CXXFLAGS = -g -Wall
COMMON = \
out/module.o \
@@ -9,10 +9,13 @@ COMMON = \
out/numeric.o \
out/code.o \
out/sequence.o \
out/string.o
out/string.o \
out/ASTree.o \
out/ASTNode.o
ALL = \
bin/pycdas
bin/pycdas \
bin/pycdc
PREFIX = /usr/local
@@ -28,6 +31,9 @@ install:
bin/pycdas: pycdas.cpp $(COMMON)
$(CXX) $(CXXFLAGS) $(COMMON) pycdas.cpp -o $@
bin/pycdc: pycdc.cpp $(COMMON)
$(CXX) $(CXXFLAGS) $(COMMON) pycdc.cpp -o $@
out/module.o: module.h module.cpp
$(CXX) $(CXXFLAGS) -c module.cpp -o $@
@@ -51,3 +57,9 @@ out/sequence.o: sequence.h sequence.cpp
out/string.o: string.h string.cpp
$(CXX) $(CXXFLAGS) -c string.cpp -o $@
out/ASTree.o: ASTree.h ASTree.cpp
$(CXX) $(CXXFLAGS) -c ASTree.cpp -o $@
out/ASTNode.o: ASTNode.h ASTNode.cpp
$(CXX) $(CXXFLAGS) -c ASTNode.cpp -o $@

View File

@@ -1,5 +1,4 @@
#include "bytecode.h"
#include "data.h"
#include "numeric.h"
const char* Py1k::OpcodeNames[256] = {
@@ -226,7 +225,7 @@ bool Py3k::IsCellArg(int opcode)
}
static void print_const(PycRef<PycObject> obj, PycModule* mod)
void print_const(PycRef<PycObject> obj, PycModule* mod)
{
switch (obj->type()) {
case PycObject::TYPE_STRING:
@@ -347,39 +346,45 @@ static void print_const(PycRef<PycObject> obj, PycModule* mod)
}
}
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos)
{
opcode = source.getByte();
operand = 0;
bool haveExtArg = false;
pos += 1;
if ((mod->majorVer() == 2 && opcode == Py2k::EXTENDED_ARG) ||
(mod->majorVer() == 3 && opcode == Py3k::EXTENDED_ARG)) {
operand = source.get16() << 16;
opcode = source.getByte();
haveExtArg = true;
pos += 3;
}
if (opcode >= HAVE_ARG) {
// If we have an extended arg, we want to OR the lower part,
// else we want the whole thing (in case it's negative). We use
// the bool so that values between 0x8000 and 0xFFFF can be stored
// without becoming negative
if (haveExtArg)
operand |= (source.get16() & 0xFFFF);
else
operand = source.get16();
pos += 2;
}
}
void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent)
{
PycBuffer source(code->code()->value(), code->code()->length());
int opcode, operand;
int pos = 0;
while (!source.atEof()) {
for (int i=0; i<indent; i++)
printf(" ");
printf("%-7d ", pos); // Current bytecode position
int opcode = source.getByte();
int operand = 0;
bool haveExtArg = false;
pos += 1;
if ((mod->majorVer() == 2 && opcode == Py2k::EXTENDED_ARG) ||
(mod->majorVer() == 3 && opcode == Py3k::EXTENDED_ARG)) {
operand = source.get16() << 16;
opcode = source.getByte();
haveExtArg = true;
pos += 3;
}
if (opcode >= HAVE_ARG) {
// If we have an extended arg, we want to OR the lower part,
// else we want the whole thing (in case it's negative). We use
// the bool so that values between 0x8000 and 0xFFFF can be stored
// without becoming negative
if (haveExtArg)
operand |= (source.get16() & 0xFFFF);
else
operand = source.get16();
pos += 2;
}
bc_next(source, mod, opcode, operand, pos);
if (mod->majorVer() == 1) {
printf("%-24s", Py1k::OpcodeNames[opcode]);

View File

@@ -1,5 +1,6 @@
#include "code.h"
#include "module.h"
#include "data.h"
// Opcodes >= this value have an argument after the opcode
#define HAVE_ARG 90
@@ -129,4 +130,6 @@ bool IsCellArg(int opcode);
}
void print_const(PycRef<PycObject> obj, PycModule* mod);
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos);
void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent);

17
code.h
View File

@@ -6,6 +6,23 @@
class PycCode : public PycObject {
public:
enum CodeFlags {
CO_OPTIMIZED = 0x1,
CO_NEWLOCALS = 0x2,
CO_VARARGS = 0x4,
CO_VARKEYWORDS = 0x8,
CO_NESTED = 0x10,
CO_GENERATOR = 0x20,
CO_NOFREE = 0x40,
CO_GENERATOR_ALLOWED = 0x1000,
CO_FUTURE_DIVISION = 0x2000,
CO_FUTURE_ABSOLUTE_IMPORT = 0x4000,
CO_FUTURE_WITH_STATEMENT = 0x8000,
CO_FUTURE_PRINT_FUNCTION = 0x10000,
CO_FUTURE_UNICODE_LITERALS = 0x20000,
CO_FUTURE_BARRY_AS_BDFL = 0x40000,
};
PycCode(int type = TYPE_CODE)
: PycObject(type), m_argCount(0), m_kwOnlyArgCount(0), m_numLocals(0),
m_stackSize(0), m_flags(0), m_firstLine(0) { }

21
pycdc.cpp Normal file
View File

@@ -0,0 +1,21 @@
#include "ASTree.h"
int main(int argc, char* argv[])
{
if (argc < 2) {
fprintf(stderr, "No input file specified\n");
return 1;
}
PycModule mod;
mod.loadFromFile(argv[1]);
printf("# Source Generated with Decompyle++ pycdc\n");
printf("# File: %s (Python %d.%d%s)\n", argv[1], mod.majorVer(), mod.minorVer(),
(mod.majorVer() < 3 && mod.isUnicode()) ? " Unicode" : "");
ASTree source;
source.load(&mod);
source.printSource(&mod);
return 0;
}

View File

@@ -3,6 +3,15 @@
#include "module.h"
/* PycTuple */
PycRef<PycTuple> PycTuple::Build(const value_t& items)
{
PycRef<PycTuple> tupleObj = new PycTuple();
tupleObj->m_size = items.size();
tupleObj->m_values.resize(tupleObj->m_size);
std::copy(items.begin(), items.end(), tupleObj->m_values.begin());
return tupleObj;
}
void PycTuple::load(PycData* stream, PycModule* mod)
{
m_size = stream->get32();

View File

@@ -22,6 +22,7 @@ public:
typedef std::vector<PycRef<PycObject> > value_t;
PycTuple(int type = TYPE_TUPLE) : PycSequence(type) { }
static PycRef<PycTuple> Build(const value_t& items);
bool isEqual(PycRef<PycObject> obj) const;