diff --git a/Makefile b/Makefile index bfe88a3..04ebc99 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CXX = g++ -CXXFLAGS = -g -Wall +CXXFLAGS = -g -Wall -O2 COMMON = \ out/module.o \ diff --git a/bytecode.cpp b/bytecode.cpp index 45b784e..106e03b 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -1,5 +1,6 @@ #include "bytecode.h" #include "data.h" +#include "numeric.h" const char* Py1k::OpcodeNames[256] = { "STOP_CODE", "POP_TOP", "ROT_TWO", "ROT_THREE", "DUP_TOP", @@ -149,21 +150,172 @@ const char* Py3k::OpcodeNames[256] = { "<248>", "<249>", "<250>", "<251>", "<252>", "<253>", "<254>", "<255>", }; +bool Py1k::IsConstArg(int opcode) +{ + return (opcode == Py1k::LOAD_CONST) || (opcode == Py1k::RESERVE_FAST); +} + +bool Py1k::IsNameArg(int opcode) +{ + return (opcode == Py1k::DELETE_ATTR) || (opcode == Py1k::DELETE_GLOBAL) || + (opcode == Py1k::DELETE_NAME) || (opcode == Py1k::IMPORT_FROM) || + (opcode == Py1k::IMPORT_NAME) || (opcode == Py1k::LOAD_ATTR) || + (opcode == Py1k::LOAD_GLOBAL) || (opcode == Py1k::LOAD_LOCAL) || + (opcode == Py1k::LOAD_NAME) || (opcode == Py1k::STORE_ATTR) || + (opcode == Py1k::STORE_GLOBAL) || (opcode == Py1k::STORE_NAME); +} + +bool Py1k::IsVarNameArg(int opcode) +{ + return (opcode == Py1k::DELETE_FAST) || (opcode == Py1k::LOAD_FAST) || + (opcode == Py1k::STORE_FAST); +} + +bool Py1k::IsCellArg(int opcode) +{ + return false; +} + +bool Py2k::IsConstArg(int opcode) +{ + return (opcode == Py2k::LOAD_CONST); +} + +bool Py2k::IsNameArg(int opcode) +{ + return (opcode == Py2k::DELETE_ATTR) || (opcode == Py2k::DELETE_GLOBAL) || + (opcode == Py2k::DELETE_NAME) || (opcode == Py2k::IMPORT_FROM) || + (opcode == Py2k::IMPORT_NAME) || (opcode == Py2k::LOAD_ATTR) || + (opcode == Py2k::LOAD_GLOBAL) || (opcode == Py2k::LOAD_NAME) || + (opcode == Py2k::STORE_ATTR) || (opcode == Py2k::STORE_GLOBAL) || + (opcode == Py2k::STORE_NAME); +} + +bool Py2k::IsVarNameArg(int opcode) +{ + return (opcode == Py2k::DELETE_FAST) || (opcode == Py2k::LOAD_FAST) || + (opcode == Py2k::STORE_FAST); +} + +bool Py2k::IsCellArg(int opcode) +{ + return (opcode == Py2k::LOAD_CLOSURE) || (opcode == Py2k::LOAD_DEREF) || + (opcode == Py2k::STORE_DEREF); +} + +bool Py3k::IsConstArg(int opcode) +{ + return (opcode == Py3k::LOAD_CONST); +} + +bool Py3k::IsNameArg(int opcode) +{ + return (opcode == Py3k::DELETE_ATTR) || (opcode == Py3k::DELETE_GLOBAL) || + (opcode == Py3k::DELETE_NAME) || (opcode == Py3k::IMPORT_FROM) || + (opcode == Py3k::IMPORT_NAME) || (opcode == Py3k::LOAD_ATTR) || + (opcode == Py3k::LOAD_GLOBAL) || (opcode == Py3k::LOAD_NAME) || + (opcode == Py3k::STORE_ATTR) || (opcode == Py3k::STORE_GLOBAL) || + (opcode == Py3k::STORE_NAME); +} + +bool Py3k::IsVarNameArg(int opcode) +{ + return (opcode == Py3k::DELETE_FAST) || (opcode == Py3k::LOAD_FAST) || + (opcode == Py3k::STORE_FAST); +} + +bool Py3k::IsCellArg(int opcode) +{ + return (opcode == Py3k::LOAD_CLOSURE) || (opcode == Py3k::LOAD_DEREF) || + (opcode == Py3k::STORE_DEREF); +} + + +static void print_const(PycRef obj) +{ + switch (obj->type()) { + case PycObject::TYPE_STRING: + case PycObject::TYPE_STRINGREF: + case PycObject::TYPE_INTERNED: + printf("\""); + OutputString(obj.cast(), QS_Double); + printf("\""); + break; + case PycObject::TYPE_TUPLE: + { + printf("("); + PycTuple::value_t values = obj.cast()->values(); + PycTuple::value_t::iterator it = values.begin(); + if (it != values.end()) { + print_const(*it); + while (++it != values.end()) { + printf(", "); + print_const(*it); + } + } + printf(")"); + } + break; + case PycObject::TYPE_LIST: + { + printf("["); + PycList::value_t values = obj.cast()->values(); + PycList::value_t::iterator it = values.begin(); + if (it != values.end()) { + print_const(*it); + while (++it != values.end()) { + printf(", "); + print_const(*it); + } + } + printf("]"); + } + break; + case PycObject::TYPE_NONE: + printf("None"); + break; + case PycObject::TYPE_TRUE: + printf("True"); + break; + case PycObject::TYPE_FALSE: + printf("False"); + break; + case PycObject::TYPE_INT: + printf("%d", obj.cast()->value()); + break; + case PycObject::TYPE_FLOAT: + printf("%s", obj.cast()->value()); + break; + case PycObject::TYPE_CODE: + printf(" %s", obj.cast()->name()->value()); + break; + } +} + void bc_disasm(PycRef code, PycModule* mod, int indent) { PycBuffer source(code->code()->value(), code->code()->length()); - int operand = 0; while (!source.atEof()) { int opcode = source.getByte(); - bool extArg = false; + int operand = 0; + bool haveExtArg = false; if ((mod->majorVer() == 2 && opcode == Py2k::EXTENDED_ARG) || (mod->majorVer() == 3 && opcode == Py3k::EXTENDED_ARG)) { - extArg = true; + operand = source.get16() << 16; opcode = source.getByte(); + haveExtArg = true; + } + if (opcode >= HAVE_ARG) { + // If we have an extended arg, we want to OR the lower part, + // else we want the whole thing (in case it's negative). We use + // the bool so that values between 0x8000 and 0xFFFF can be stored + // without becoming negative + if (haveExtArg) + operand |= (source.get16() & 0xFFFF); + else + operand = source.get16(); } - if (opcode >= HAVE_ARG) - operand = extArg ? source.get32() : source.get16(); for (int i=0; i code, PycModule* mod, int indent) } else if (mod->majorVer() == 3) { printf("%-24s", Py3k::OpcodeNames[opcode]); } - if (opcode >= HAVE_ARG) - printf("%d\n", operand); - else - printf("\n"); + if (opcode >= HAVE_ARG) { + if ((mod->majorVer() == 1 && Py1k::IsConstArg(opcode)) || + (mod->majorVer() == 2 && Py2k::IsConstArg(opcode)) || + (mod->majorVer() == 3 && Py3k::IsConstArg(opcode))) { + printf("%d: ", operand); + print_const(code->getConst(operand)); + } else if ((mod->majorVer() == 1 && Py1k::IsNameArg(opcode)) || + (mod->majorVer() == 2 && Py2k::IsNameArg(opcode)) || + (mod->majorVer() == 3 && Py3k::IsNameArg(opcode))) { + printf("%d: %s", operand, code->getName(operand)->value()); + } else if ((mod->majorVer() == 1 && Py1k::IsVarNameArg(opcode)) || + (mod->majorVer() == 2 && Py2k::IsVarNameArg(opcode)) || + (mod->majorVer() == 3 && Py3k::IsVarNameArg(opcode))) { + printf("%d: %s", operand, code->getVarName(operand)->value()); + } else if ((mod->majorVer() == 1 && Py1k::IsCellArg(opcode)) || + (mod->majorVer() == 2 && Py2k::IsCellArg(opcode)) || + (mod->majorVer() == 3 && Py3k::IsCellArg(opcode))) { + printf("%d: ", operand); + print_const(code->getConst(operand)); + } else { + printf("%d", operand); + } + } + printf("\n"); } } diff --git a/bytecode.h b/bytecode.h index df0477d..43ac2d2 100644 --- a/bytecode.h +++ b/bytecode.h @@ -35,6 +35,11 @@ enum Opcodes { extern const char* OpcodeNames[256]; +bool IsConstArg(int opcode); +bool IsNameArg(int opcode); +bool IsVarNameArg(int opcode); +bool IsCellArg(int opcode); + } namespace Py2k { @@ -75,6 +80,11 @@ enum Opcodes { extern const char* OpcodeNames[256]; +bool IsConstArg(int opcode); +bool IsNameArg(int opcode); +bool IsVarNameArg(int opcode); +bool IsCellArg(int opcode); + } namespace Py3k { @@ -82,7 +92,7 @@ namespace Py3k { enum Opcodes { STOP_CODE = 0, POP_TOP, ROT_TWO, ROT_THREE, DUP_TOP, ROT_FOUR, NOP = 9, UNARY_POSITIVE, UNARY_NEGATIVE, UNARY_NOT, - UNARY_INVERT = 15, SET_ADD2 = 17, LIST_APPEND2, + UNARY_INVERT = 15, SET_ADD = 17, LIST_APPEND, BINARY_POWER = 19, BINARY_MULTIPLY, BINARY_MODULO = 22, BINARY_ADD, BINARY_SUBTRACT, BINARY_SUBSCR, BINARY_FLOOR_DIVIDE, BINARY_TRUE_DIVIDE, INPLACE_FLOOR_DIVIDE, INPLACE_TRUE_DIVIDE, @@ -108,11 +118,16 @@ enum Opcodes { MAKE_CLOSURE, LOAD_CLOSURE, LOAD_DEREF, STORE_DEREF, CALL_FUNCTION_VAR = 140, CALL_FUNCTION_KW, CALL_FUNCTION_VAR_KW, EXTENDED_ARG, - LIST_APPEND = 145, SET_ADD, MAP_ADD, + LIST_APPEND_A = 145, SET_ADD_A, MAP_ADD_A, }; extern const char* OpcodeNames[256]; +bool IsConstArg(int opcode); +bool IsNameArg(int opcode); +bool IsVarNameArg(int opcode); +bool IsCellArg(int opcode); + } void bc_disasm(PycRef code, PycModule* mod, int indent); diff --git a/code.h b/code.h index ceb80f7..b57afae 100644 --- a/code.h +++ b/code.h @@ -31,6 +31,21 @@ public: int firstLine() const { return m_firstLine; } PycRef lnTable() const { return m_lnTable; } + PycRef getConst(int idx) const + { return m_consts->values()[idx]; } + + PycRef getName(int idx) const + { return m_names->values()[idx].cast(); } + + PycRef getVarName(int idx) const + { return m_varNames->values()[idx].cast(); } + + PycRef getCellVar(int idx) const + { + return (idx > m_cellVars->size()) ? m_freeVars->values()[idx - m_cellVars->size()] + : m_cellVars->values()[idx]; + } + private: int m_argCount, m_kwOnlyArgCount, m_numLocals, m_stackSize, m_flags; PycRef m_code; diff --git a/pycdas.cpp b/pycdas.cpp index 24eefba..f670890 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -65,7 +65,9 @@ void output_object(PycRef obj, PycModule* mod, int indent) case PycObject::TYPE_STRING: case PycObject::TYPE_STRINGREF: case PycObject::TYPE_INTERNED: - iprintf(indent, "\"%s\"\n", obj.cast()->value()); + iprintf(indent, "\""); + OutputString(obj.cast(), QS_Double); + printf("\"\n"); break; case PycObject::TYPE_TUPLE: { diff --git a/string.cpp b/string.cpp index 1ed4372..124028d 100644 --- a/string.cpp +++ b/string.cpp @@ -32,3 +32,37 @@ void PycString::load(PycData* stream, PycModule* mod) mod->intern(this); } } + + +void OutputString(PycRef str, QuoteStyle style, FILE* F) +{ + const char* ch = str->value(); + if (ch == 0) + return; + while (*ch != 0) { + if (*ch < 0x20) { + if (*ch == '\r') { + fprintf(F, "\\r"); + } else if (*ch == '\n') { + if (style == QS_BlockSingle || style == QS_BlockDouble) + fputc('\n', F); + else + fprintf(F, "\\n"); + } else if (*ch == '\t') { + fprintf(F, "\\t"); + } else { + fprintf(F, "\\x%x", *ch); + } + } else if (*ch >= 0x7F) { + fprintf(F, "\\x%x", *ch); + } else { + if (style == QS_Single && *ch == '\'') + fprintf(F, "\\'"); + else if (style == QS_Double && *ch == '"') + fprintf(F, "\\\""); + else + fputc(*ch, F); + } + ch++; + } +} diff --git a/string.h b/string.h index b65345e..19869c5 100644 --- a/string.h +++ b/string.h @@ -2,6 +2,11 @@ #define _PYC_STRING_H #include "object.h" +#include + +enum QuoteStyle { + QS_Single, QS_Double, QS_BlockSingle, QS_BlockDouble +}; class PycString : public PycObject { public: @@ -26,4 +31,6 @@ private: int m_length; }; +void OutputString(PycRef str, QuoteStyle style, FILE* F = stdout); + #endif