From 98a50fd0423df6e41f5a08ee412c3c73034b76d3 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Sat, 25 Jul 2009 02:41:15 +0000 Subject: [PATCH] Added the rest of the marshalable types, and more info to the disassembly output (including a bytecode position counter to help with jump calculations) --- bytecode.cpp | 56 ++++++++++++++++++++++++++++++++++-------- code.h | 3 --- data.cpp | 14 +++++++++++ data.h | 7 ++++++ numeric.cpp | 67 +++++++++++++++++++++++++++++++++++++++++++++++--- numeric.h | 69 ++++++++++++++++++++++++++++++++++++++++++++-------- object.cpp | 26 ++++++++++---------- object.h | 3 --- pycdas.cpp | 39 +++++++++++++++++++++++------ sequence.cpp | 36 +++++++++++++++++++++++++++ sequence.h | 32 +++++++++++++++++------- string.cpp | 3 +++ string.h | 16 ------------ 13 files changed, 295 insertions(+), 76 deletions(-) diff --git a/bytecode.cpp b/bytecode.cpp index 0567ef4..e259f69 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -232,17 +232,20 @@ static void print_const(PycRef obj, PycModule* mod) case PycObject::TYPE_STRING: case PycObject::TYPE_STRINGREF: case PycObject::TYPE_INTERNED: - printf("\""); - OutputString(obj.cast(), QS_Double); - printf("\""); + if (mod->majorVer() == 3) + printf("b'"); + else + printf("'"); + OutputString(obj.cast(), QS_Single); + printf("'"); break; case PycObject::TYPE_UNICODE: if (mod->majorVer() == 3) - printf("\""); + printf("'"); else - printf("u\""); - OutputString(obj.cast(), QS_Double); - printf("\""); + printf("u'"); + OutputString(obj.cast(), QS_Single); + printf("'"); break; case PycObject::TYPE_TUPLE: { @@ -296,6 +299,21 @@ static void print_const(PycRef obj, PycModule* mod) printf("}"); } break; + case PycObject::TYPE_SET: + { + printf("{"); + PycSet::value_t values = obj.cast()->values(); + PycSet::value_t::iterator it = values.begin(); + if (it != values.end()) { + print_const(*it, mod); + while (++it != values.end()) { + printf(", "); + print_const(*it, mod); + } + } + printf("}"); + } + break; case PycObject::TYPE_NONE: printf("None"); break; @@ -311,6 +329,17 @@ static void print_const(PycRef obj, PycModule* mod) case PycObject::TYPE_FLOAT: printf("%s", obj.cast()->value()); break; + case PycObject::TYPE_COMPLEX: + printf("(%s+%sj)", obj.cast()->value(), + obj.cast()->imag()); + break; + case PycObject::TYPE_BINARY_FLOAT: + printf("%g", obj.cast()->value()); + break; + case PycObject::TYPE_BINARY_COMPLEX: + printf("(%g+%gj)", obj.cast()->value(), + obj.cast()->imag()); + break; case PycObject::TYPE_CODE: case PycObject::TYPE_CODE2: printf(" %s", obj.cast()->name()->value()); @@ -322,15 +351,23 @@ void bc_disasm(PycRef code, PycModule* mod, int indent) { PycBuffer source(code->code()->value(), code->code()->length()); + int pos = 0; while (!source.atEof()) { + for (int i=0; imajorVer() == 2 && opcode == Py2k::EXTENDED_ARG) || (mod->majorVer() == 3 && opcode == Py3k::EXTENDED_ARG)) { operand = source.get16() << 16; opcode = source.getByte(); haveExtArg = true; + pos += 3; } if (opcode >= HAVE_ARG) { // If we have an extended arg, we want to OR the lower part, @@ -341,10 +378,9 @@ void bc_disasm(PycRef code, PycModule* mod, int indent) operand |= (source.get16() & 0xFFFF); else operand = source.get16(); + pos += 2; } - for (int i=0; imajorVer() == 1) { printf("%-24s", Py1k::OpcodeNames[opcode]); } else if (mod->majorVer() == 2) { @@ -359,7 +395,7 @@ void bc_disasm(PycRef code, PycModule* mod, int indent) printf("%d: ", operand); print_const(code->getConst(operand), mod); } else if ((mod->majorVer() == 1 && Py1k::IsNameArg(opcode)) || - (mod->majorVer() == 1 && mod->minorVer() < 4 && Py1k::IsVarNameArg(opcode)) || + (mod->majorVer() == 1 && mod->minorVer() < 3 && Py1k::IsVarNameArg(opcode)) || (mod->majorVer() == 2 && Py2k::IsNameArg(opcode)) || (mod->majorVer() == 3 && Py3k::IsNameArg(opcode))) { printf("%d: %s", operand, code->getName(operand)->value()); diff --git a/code.h b/code.h index 1099dff..60b3b26 100644 --- a/code.h +++ b/code.h @@ -10,9 +10,6 @@ public: : PycObject(type), m_argCount(0), m_kwOnlyArgCount(0), m_numLocals(0), m_stackSize(0), m_flags(0), m_firstLine(0) { } - bool isType(int type) const - { return (type == TYPE_CODE) || (type == TYPE_CODE2) || PycObject::isType(type); } - void load(class PycData* stream, class PycModule* mod); int argCount() const { return m_argCount; } diff --git a/data.cpp b/data.cpp index 557be9e..6c6302e 100644 --- a/data.cpp +++ b/data.cpp @@ -23,6 +23,20 @@ int PycData::get32() ); } +Pyc_INT64 PycData::get64() +{ + /* Ensure endianness */ + return (Pyc_INT64)( ((Pyc_INT64)(getByte() & 0xFF) ) + | ((Pyc_INT64)(getByte() & 0xFF) << 8) + | ((Pyc_INT64)(getByte() & 0xFF) << 16) + | ((Pyc_INT64)(getByte() & 0xFF) << 24) + | ((Pyc_INT64)(getByte() & 0xFF) << 32) + | ((Pyc_INT64)(getByte() & 0xFF) << 40) + | ((Pyc_INT64)(getByte() & 0xFF) << 48) + | ((Pyc_INT64)(getByte() & 0xFF) << 56) + ); +} + /* PycFile */ PycFile::PycFile(const char* filename) diff --git a/data.h b/data.h index 225d724..6391964 100644 --- a/data.h +++ b/data.h @@ -3,6 +3,12 @@ #include +#ifdef WIN32 +typedef __int64 Pyc_INT64; +#else +typedef long long Pyc_INT64; +#endif + class PycData { public: PycData() { } @@ -15,6 +21,7 @@ public: virtual int getBuffer(int bytes, void* buffer) = 0; int get16(); int get32(); + Pyc_INT64 get64(); }; class PycFile : public PycData { diff --git a/numeric.cpp b/numeric.cpp index c61165a..7189b64 100644 --- a/numeric.cpp +++ b/numeric.cpp @@ -13,14 +13,27 @@ void PycInt::load(PycData* stream, PycModule*) /* PycLong */ void PycLong::load(PycData* stream, PycModule*) { - m_size = stream->get32(); - int actualSize = m_size & 0x7FFFFFFF; - for (int i=0; iget16()); + if (type() == TYPE_INT64) { + int lo = stream->get32(); + int hi = stream->get32(); + m_value.push_back((lo ) & 0xFFFF); + m_value.push_back((lo >> 16) & 0xFFFF); + m_value.push_back((hi ) & 0xFFFF); + m_value.push_back((hi >> 16) & 0xFFFF); + m_size = (hi & 0x80000000) != 0 ? -4 : 4; + } else { + m_size = stream->get32(); + int actualSize = m_size & 0x7FFFFFFF; + for (int i=0; iget16()); + } } bool PycLong::isEqual(PycRef obj) const { + if (type() != obj->type()) + return false; + PycRef longObj = obj.cast(); if (m_size != longObj->m_size) return false; @@ -51,8 +64,54 @@ void PycFloat::load(PycData* stream, PycModule*) bool PycFloat::isEqual(PycRef obj) const { + if (type() != obj->type()) + return false; + PycRef floatObj = obj.cast(); if (m_value == floatObj->m_value) return true; return (strcmp(m_value, floatObj->m_value) == 0); } + + +/* PycComplex */ +void PycComplex::load(PycData* stream, PycModule* mod) +{ + PycFloat::load(stream, mod); + + int len = stream->getByte(); + if (m_imag) delete[] m_imag; + if (len > 0) { + m_imag = new char[len+1]; + stream->getBuffer(len, m_imag); + m_imag[len] = 0; + } else { + m_imag = 0; + } +} + +bool PycComplex::isEqual(PycRef obj) const +{ + if (!PycFloat::isEqual(obj)) + return false; + + PycRef floatObj = obj.cast(); + if (m_imag == floatObj->m_imag) + return true; + return (strcmp(m_imag, floatObj->m_imag) == 0); +} + + +/* PycCFloat */ +void PycCFloat::load(PycData* stream, PycModule*) +{ + m_value = (double)stream->get64(); +} + + +/* PycCComplex */ +void PycCComplex::load(PycData* stream, PycModule* mod) +{ + PycCFloat::load(stream, mod); + m_imag = (double)stream->get64(); +} diff --git a/numeric.h b/numeric.h index 6ee0da9..c2c08c7 100644 --- a/numeric.h +++ b/numeric.h @@ -9,11 +9,11 @@ public: PycInt(int value = 0, int type = TYPE_INT) : PycObject(type), m_value(value) { } - bool isType(int type) const - { return (type == TYPE_INT) || PycObject::isType(type); } - bool isEqual(PycRef obj) const - { return m_value == obj.cast()->m_value; } + { + return (type() == obj->type()) && + (m_value == obj.cast()->m_value); + } void load(class PycData* stream, class PycModule* mod); @@ -28,9 +28,6 @@ public: PycLong(int type = TYPE_LONG) : PycObject(type), m_size(0) { } - bool isType(int type) const - { return (type == TYPE_LONG) || PycObject::isType(type); } - bool isEqual(PycRef obj) const; void load(class PycData* stream, class PycModule* mod); @@ -50,9 +47,6 @@ public: ~PycFloat() { if (m_value) delete[] m_value; } - bool isType(int type) const - { return (type == TYPE_FLOAT) || PycObject::isType(type); } - bool isEqual(PycRef obj) const; void load(class PycData* stream, class PycModule* mod); @@ -63,4 +57,59 @@ private: char* m_value; // Floats are stored as strings }; +class PycComplex : public PycFloat { +public: + PycComplex(int type = TYPE_COMPLEX) + : PycFloat(type), m_imag(0) { } + + ~PycComplex() { if (m_imag) delete[] m_imag; } + + bool isEqual(PycRef obj) const; + + void load(class PycData* stream, class PycModule* mod); + + const char* imag() const { return m_imag; } + +private: + char* m_imag; +}; + +class PycCFloat : public PycObject { +public: + PycCFloat(int type = TYPE_BINARY_FLOAT) + : PycObject(type), m_value(0.0) { } + + bool isEqual(PycRef obj) const + { + return (type() == obj->type()) && + (m_value == obj.cast()->m_value); + } + + void load(class PycData* stream, class PycModule* mod); + + double value() const { return m_value; } + +private: + double m_value; +}; + +class PycCComplex : public PycCFloat { +public: + PycCComplex(int type = TYPE_BINARY_COMPLEX) + : PycCFloat(type), m_imag(0.0) { } + + bool isEqual(PycRef obj) const + { + return (PycCFloat::isEqual(obj)) && + (m_imag == obj.cast()->m_imag); + } + + void load(class PycData* stream, class PycModule* mod); + + double imag() const { return m_imag; } + +private: + double m_imag; +}; + #endif diff --git a/object.cpp b/object.cpp index 1781062..3fdc3cf 100644 --- a/object.cpp +++ b/object.cpp @@ -30,16 +30,16 @@ PycRef CreateObject(int type) return Pyc_Ellipsis; case PycObject::TYPE_INT: return new PycInt(); - //case PycObject::TYPE_INT64: - // ... + case PycObject::TYPE_INT64: + return new PycLong(PycObject::TYPE_INT64); case PycObject::TYPE_FLOAT: return new PycFloat(); - //case PycObject::TYPE_BINARY_FLOAT: - // ... - //case PycObject::TYPE_COMPLEX: - // ... - //case PycObject::TYPE_BINARY_COMPLEX: - // ... + case PycObject::TYPE_BINARY_FLOAT: + return new PycCFloat(); + case PycObject::TYPE_COMPLEX: + return new PycComplex(); + case PycObject::TYPE_BINARY_COMPLEX: + return new PycCComplex(); case PycObject::TYPE_LONG: return new PycLong(); case PycObject::TYPE_STRING: @@ -58,11 +58,11 @@ PycRef CreateObject(int type) case PycObject::TYPE_CODE2: return new PycCode(); case PycObject::TYPE_UNICODE: - return new PycUnicode(); - //case PycObject::TYPE_SET: - // ... - //case PycObject::TYPE_FROZENSET: - // ... + return new PycString(PycObject::TYPE_UNICODE); + case PycObject::TYPE_SET: + return new PycSet(); + case PycObject::TYPE_FROZENSET: + return new PycSet(PycObject::TYPE_FROZENSET); default: fprintf(stderr, "CreateObject: Got unsupported type 0x%X\n", type); return (PycObject*)0; diff --git a/object.h b/object.h index d966ff4..243998d 100644 --- a/object.h +++ b/object.h @@ -80,9 +80,6 @@ public: int type() const { return (this) ? m_type : TYPE_NULL; } - virtual bool isType(int type) const - { return (this) ? type == m_type : type == TYPE_NULL; } - virtual bool isEqual(PycRef obj) const { return (this == (PycObject*)obj); } diff --git a/pycdas.cpp b/pycdas.cpp index 35ec88c..84728c5 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -71,17 +71,20 @@ void output_object(PycRef obj, PycModule* mod, int indent) case PycObject::TYPE_STRING: case PycObject::TYPE_STRINGREF: case PycObject::TYPE_INTERNED: - iprintf(indent, "\""); - OutputString(obj.cast(), QS_Double); - printf("\"\n"); + if (mod->majorVer() == 3) + iprintf(indent, "b'"); + else + iprintf(indent, "'"); + OutputString(obj.cast(), QS_Single); + printf("'\n"); break; case PycObject::TYPE_UNICODE: if (mod->majorVer() == 3) - iprintf(indent, "\""); + iprintf(indent, "'"); else - iprintf(indent, "u\""); - OutputString(obj.cast(), QS_Double); - printf("\"\n"); + iprintf(indent, "u'"); + OutputString(obj.cast(), QS_Single); + printf("'\n"); break; case PycObject::TYPE_TUPLE: { @@ -116,6 +119,15 @@ void output_object(PycRef obj, PycModule* mod, int indent) iprintf(indent, "}\n"); } break; + case PycObject::TYPE_SET: + { + iprintf(indent, "{\n"); + PycSet::value_t values = obj.cast()->values(); + for (PycSet::value_t::iterator i = values.begin(); i != values.end(); i++) + output_object(*i, mod, indent + 1); + iprintf(indent, "}\n"); + } + break; case PycObject::TYPE_NONE: iprintf(indent, "None\n"); break; @@ -131,6 +143,17 @@ void output_object(PycRef obj, PycModule* mod, int indent) case PycObject::TYPE_FLOAT: iprintf(indent, "%s\n", obj.cast()->value()); break; + case PycObject::TYPE_COMPLEX: + iprintf(indent, "(%s+%sj)\n", obj.cast()->value(), + obj.cast()->imag()); + break; + case PycObject::TYPE_BINARY_FLOAT: + iprintf(indent, "%g\n", obj.cast()->value()); + break; + case PycObject::TYPE_BINARY_COMPLEX: + iprintf(indent, "(%g+%gj)\n", obj.cast()->value(), + obj.cast()->imag()); + break; default: iprintf(indent, "\n", obj->type()); } @@ -146,7 +169,7 @@ int main(int argc, char* argv[]) PycModule mod; mod.loadFromFile(argv[1]); printf("%s (Python %d.%d%s)\n", argv[1], mod.majorVer(), mod.minorVer(), - mod.isUnicode() ? " -U" : ""); + (mod.majorVer() < 3 && mod.isUnicode()) ? " -U" : ""); output_object(mod.code().cast(), &mod, 0); return 0; diff --git a/sequence.cpp b/sequence.cpp index 81b3907..076755c 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -13,6 +13,9 @@ void PycTuple::load(PycData* stream, PycModule* mod) bool PycTuple::isEqual(PycRef obj) const { + if (type() != obj->type()) + return false; + PycRef tupleObj = obj.cast(); if (m_size != tupleObj->m_size) return false; @@ -37,6 +40,9 @@ void PycList::load(PycData* stream, PycModule* mod) bool PycList::isEqual(PycRef obj) const { + if (type() != obj->type()) + return false; + PycRef listObj = obj.cast(); if (m_size != listObj->m_size) return false; @@ -67,6 +73,9 @@ void PycDict::load(PycData* stream, PycModule* mod) bool PycDict::isEqual(PycRef obj) const { + if (type() != obj->type()) + return false; + PycRef dictObj = obj.cast(); if (m_size != dictObj->m_size) return false; @@ -100,3 +109,30 @@ PycRef PycDict::get(PycRef key) const } return Pyc_NULL; // Disassembly shouldn't get non-existant keys } + + +/* PycSet */ +void PycSet::load(PycData* stream, PycModule* mod) +{ + m_size = stream->get32(); + for (int i=0; i obj) const +{ + if (type() != obj->type()) + return false; + + PycRef setObj = obj.cast(); + if (m_size != setObj->m_size) + return false; + value_t::const_iterator it1 = m_values.begin(); + value_t::const_iterator it2 = setObj->m_values.begin(); + while (it1 != m_values.end()) { + if (!(*it1)->isEqual(*it2)) + return false; + ++it1, ++it2; + } + return true; +} diff --git a/sequence.h b/sequence.h index 8e7662c..941320a 100644 --- a/sequence.h +++ b/sequence.h @@ -4,6 +4,7 @@ #include "object.h" #include #include +#include class PycSequence : public PycObject { public: @@ -22,9 +23,6 @@ public: PycTuple(int type = TYPE_TUPLE) : PycSequence(type) { } - bool isType(int type) const - { return (type == TYPE_TUPLE) || PycObject::isType(type); } - bool isEqual(PycRef obj) const; void load(class PycData* stream, class PycModule* mod); @@ -42,9 +40,6 @@ public: PycList(int type = TYPE_LIST) : PycSequence(type) { } - bool isType(int type) const - { return (type == TYPE_LIST) || PycObject::isType(type); } - bool isEqual(PycRef obj) const; void load(class PycData* stream, class PycModule* mod); @@ -68,9 +63,6 @@ public: PycDict(int type = TYPE_DICT) : PycSequence(type) { } - bool isType(int type) const - { return (type == TYPE_DICT) || PycObject::isType(type); } - bool isEqual(PycRef obj) const; void load(class PycData* stream, class PycModule* mod); @@ -91,4 +83,26 @@ private: value_t m_values; }; +class PycSet : public PycSequence { +public: + typedef std::set > value_t; + + PycSet(int type = TYPE_SET) : PycSequence(type) { } + + bool isEqual(PycRef obj) const; + + void load(class PycData* stream, class PycModule* mod); + + value_t values() const { return m_values; } + PycRef get(int idx) const + { + value_t::const_iterator it = m_values.begin(); + for (int i=0; i obj) const { + if (type() != obj->type()) + return false; + PycRef strObj = obj.cast(); if (m_value == strObj->m_value) return true; diff --git a/string.h b/string.h index 3ee5297..d572751 100644 --- a/string.h +++ b/string.h @@ -15,12 +15,6 @@ public: ~PycString() { if (m_value) delete[] m_value; } - bool isType(int type) const - { - return (type == TYPE_STRING) || (type == TYPE_INTERNED) || - (type == TYPE_STRINGREF) || PycObject::isType(type); - } - bool isEqual(PycRef obj) const; void load(class PycData* stream, class PycModule* mod); @@ -33,16 +27,6 @@ private: int m_length; }; -class PycUnicode : public PycString { -public: - PycUnicode(int type = TYPE_UNICODE) : PycString(type) { } - - bool isType(int type) const - { - return (type == TYPE_UNICODE) || PycString::isType(type); - } -}; - void OutputString(PycRef str, QuoteStyle style, FILE* F = stdout); #endif