Added the rest of the marshalable types, and more info to the disassembly output (including a bytecode position counter to help with jump calculations)

2009-07-25 02:41:15 +00:00
parent 3b389f4b00
commit 98a50fd042
13 changed files with 295 additions and 76 deletions
--- a/bytecode.cpp
+++ b/bytecode.cpp
@@ -232,17 +232,20 @@ static void print_const(PycRef<PycObject> obj, PycModule* mod)
    case PycObject::TYPE_STRING:
    case PycObject::TYPE_STRINGREF:
    case PycObject::TYPE_INTERNED:
-        printf("\"");
-        OutputString(obj.cast<PycString>(), QS_Double);
-        printf("\"");
+        if (mod->majorVer() == 3)
+            printf("b'");
+        else
+            printf("'");
+        OutputString(obj.cast<PycString>(), QS_Single);
+        printf("'");
        break;
    case PycObject::TYPE_UNICODE:
        if (mod->majorVer() == 3)
-            printf("\"");
+            printf("'");
        else
-            printf("u\"");
-        OutputString(obj.cast<PycString>(), QS_Double);
-        printf("\"");
+            printf("u'");
+        OutputString(obj.cast<PycString>(), QS_Single);
+        printf("'");
        break;
    case PycObject::TYPE_TUPLE:
        {
@@ -296,6 +299,21 @@ static void print_const(PycRef<PycObject> obj, PycModule* mod)
            printf("}");
        }
        break;
+    case PycObject::TYPE_SET:
+        {
+            printf("{");
+            PycSet::value_t values = obj.cast<PycSet>()->values();
+            PycSet::value_t::iterator it = values.begin();
+            if (it != values.end()) {
+                print_const(*it, mod);
+                while (++it != values.end()) {
+                    printf(", ");
+                    print_const(*it, mod);
+                }
+            }
+            printf("}");
+        }
+        break;
    case PycObject::TYPE_NONE:
        printf("None");
        break;
@@ -311,6 +329,17 @@ static void print_const(PycRef<PycObject> obj, PycModule* mod)
    case PycObject::TYPE_FLOAT:
        printf("%s", obj.cast<PycFloat>()->value());
        break;
+    case PycObject::TYPE_COMPLEX:
+        printf("(%s+%sj)", obj.cast<PycComplex>()->value(),
+                           obj.cast<PycComplex>()->imag());
+        break;
+    case PycObject::TYPE_BINARY_FLOAT:
+        printf("%g", obj.cast<PycCFloat>()->value());
+        break;
+    case PycObject::TYPE_BINARY_COMPLEX:
+        printf("(%g+%gj)", obj.cast<PycCComplex>()->value(),
+                           obj.cast<PycCComplex>()->imag());
+        break;
    case PycObject::TYPE_CODE:
    case PycObject::TYPE_CODE2:
        printf("<CODE> %s", obj.cast<PycCode>()->name()->value());
@@ -322,15 +351,23 @@ void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent)
 {
    PycBuffer source(code->code()->value(), code->code()->length());

+    int pos = 0;
    while (!source.atEof()) {
+        for (int i=0; i<indent; i++)
+            printf("    ");
+        printf("%-7d ", pos);   // Current bytecode position
+
        int opcode = source.getByte();
        int operand = 0;
        bool haveExtArg = false;
+        pos += 1;
+
        if ((mod->majorVer() == 2 && opcode == Py2k::EXTENDED_ARG) ||
            (mod->majorVer() == 3 && opcode == Py3k::EXTENDED_ARG)) {
            operand = source.get16() << 16;
            opcode = source.getByte();
            haveExtArg = true;
+            pos += 3;
        }
        if (opcode >= HAVE_ARG) {
            // If we have an extended arg, we want to OR the lower part,
@@ -341,10 +378,9 @@ void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent)
                operand |= (source.get16() & 0xFFFF);
            else
                operand = source.get16();
+            pos += 2;
        }

-        for (int i=0; i<indent; i++)
-            printf("    ");
        if (mod->majorVer() == 1) {
            printf("%-24s", Py1k::OpcodeNames[opcode]);
        } else if (mod->majorVer() == 2) {
@@ -359,7 +395,7 @@ void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent)
                printf("%d: ", operand);
                print_const(code->getConst(operand), mod);
            } else if ((mod->majorVer() == 1 && Py1k::IsNameArg(opcode)) ||
-                       (mod->majorVer() == 1 && mod->minorVer() < 4 && Py1k::IsVarNameArg(opcode)) ||
+                       (mod->majorVer() == 1 && mod->minorVer() < 3 && Py1k::IsVarNameArg(opcode)) ||
                       (mod->majorVer() == 2 && Py2k::IsNameArg(opcode)) ||
                       (mod->majorVer() == 3 && Py3k::IsNameArg(opcode))) {
                printf("%d: %s", operand, code->getName(operand)->value());
--- a/code.h
+++ b/code.h
@@ -10,9 +10,6 @@ public:
        : PycObject(type), m_argCount(0), m_kwOnlyArgCount(0), m_numLocals(0),
          m_stackSize(0), m_flags(0), m_firstLine(0) { }

-    bool isType(int type) const
-    { return (type == TYPE_CODE) || (type == TYPE_CODE2) || PycObject::isType(type); }
-
    void load(class PycData* stream, class PycModule* mod);

    int argCount() const { return m_argCount; }
--- a/data.cpp
+++ b/data.cpp
@@ -23,6 +23,20 @@ int PycData::get32()
                );
 }

+Pyc_INT64 PycData::get64()
+{
+    /* Ensure endianness */
+    return (Pyc_INT64)( ((Pyc_INT64)(getByte() & 0xFF)      )
+                      | ((Pyc_INT64)(getByte() & 0xFF) <<  8)
+                      | ((Pyc_INT64)(getByte() & 0xFF) << 16)
+                      | ((Pyc_INT64)(getByte() & 0xFF) << 24)
+                      | ((Pyc_INT64)(getByte() & 0xFF) << 32)
+                      | ((Pyc_INT64)(getByte() & 0xFF) << 40)
+                      | ((Pyc_INT64)(getByte() & 0xFF) << 48)
+                      | ((Pyc_INT64)(getByte() & 0xFF) << 56)
+                      );
+}
+

 /* PycFile */
 PycFile::PycFile(const char* filename)
--- a/data.h
+++ b/data.h
@@ -3,6 +3,12 @@

 #include <cstdio>

+#ifdef WIN32
+typedef __int64 Pyc_INT64;
+#else
+typedef long long Pyc_INT64;
+#endif
+
 class PycData {
 public:
    PycData() { }
@@ -15,6 +21,7 @@ public:
    virtual int getBuffer(int bytes, void* buffer) = 0;
    int get16();
    int get32();
+    Pyc_INT64 get64();
 };

 class PycFile : public PycData {
--- a/numeric.cpp
+++ b/numeric.cpp
@@ -13,14 +13,27 @@ void PycInt::load(PycData* stream, PycModule*)
 /* PycLong */
 void PycLong::load(PycData* stream, PycModule*)
 {
-    m_size = stream->get32();
-    int actualSize = m_size & 0x7FFFFFFF;
-    for (int i=0; i<actualSize; i++)
-        m_value.push_back(stream->get16());
+    if (type() == TYPE_INT64) {
+        int lo = stream->get32();
+        int hi = stream->get32();
+        m_value.push_back((lo      ) & 0xFFFF);
+        m_value.push_back((lo >> 16) & 0xFFFF);
+        m_value.push_back((hi      ) & 0xFFFF);
+        m_value.push_back((hi >> 16) & 0xFFFF);
+        m_size = (hi & 0x80000000) != 0 ? -4 : 4;
+    } else {
+        m_size = stream->get32();
+        int actualSize = m_size & 0x7FFFFFFF;
+        for (int i=0; i<actualSize; i++)
+            m_value.push_back(stream->get16());
+    }
 }

 bool PycLong::isEqual(PycRef<PycObject> obj) const
 {
+    if (type() != obj->type())
+        return false;
+
    PycRef<PycLong> longObj = obj.cast<PycLong>();
    if (m_size != longObj->m_size)
        return false;
@@ -51,8 +64,54 @@ void PycFloat::load(PycData* stream, PycModule*)

 bool PycFloat::isEqual(PycRef<PycObject> obj) const
 {
+    if (type() != obj->type())
+        return false;
+
    PycRef<PycFloat> floatObj = obj.cast<PycFloat>();
    if (m_value == floatObj->m_value)
        return true;
    return (strcmp(m_value, floatObj->m_value) == 0);
 }
+
+
+/* PycComplex */
+void PycComplex::load(PycData* stream, PycModule* mod)
+{
+    PycFloat::load(stream, mod);
+
+    int len = stream->getByte();
+    if (m_imag) delete[] m_imag;
+    if (len > 0) {
+        m_imag = new char[len+1];
+        stream->getBuffer(len, m_imag);
+        m_imag[len] = 0;
+    } else {
+        m_imag = 0;
+    }
+}
+
+bool PycComplex::isEqual(PycRef<PycObject> obj) const
+{
+    if (!PycFloat::isEqual(obj))
+        return false;
+
+    PycRef<PycComplex> floatObj = obj.cast<PycComplex>();
+    if (m_imag == floatObj->m_imag)
+        return true;
+    return (strcmp(m_imag, floatObj->m_imag) == 0);
+}
+
+
+/* PycCFloat */
+void PycCFloat::load(PycData* stream, PycModule*)
+{
+    m_value = (double)stream->get64();
+}
+
+
+/* PycCComplex */
+void PycCComplex::load(PycData* stream, PycModule* mod)
+{
+    PycCFloat::load(stream, mod);
+    m_imag = (double)stream->get64();
+}
--- a/numeric.h
+++ b/numeric.h
@@ -9,11 +9,11 @@ public:
    PycInt(int value = 0, int type = TYPE_INT)
        : PycObject(type), m_value(value) { }

-    bool isType(int type) const
-    { return (type == TYPE_INT) || PycObject::isType(type); }
-
    bool isEqual(PycRef<PycObject> obj) const
-    { return m_value == obj.cast<PycInt>()->m_value; }
+    {
+        return (type() == obj->type()) &&
+               (m_value == obj.cast<PycInt>()->m_value);
+    }

    void load(class PycData* stream, class PycModule* mod);

@@ -28,9 +28,6 @@ public:
    PycLong(int type = TYPE_LONG)
        : PycObject(type), m_size(0) { }

-    bool isType(int type) const
-    { return (type == TYPE_LONG) || PycObject::isType(type); }
-
    bool isEqual(PycRef<PycObject> obj) const;

    void load(class PycData* stream, class PycModule* mod);
@@ -50,9 +47,6 @@ public:

    ~PycFloat() { if (m_value) delete[] m_value; }

-    bool isType(int type) const
-    { return (type == TYPE_FLOAT) || PycObject::isType(type); }
-
    bool isEqual(PycRef<PycObject> obj) const;

    void load(class PycData* stream, class PycModule* mod);
@@ -63,4 +57,59 @@ private:
    char* m_value;  // Floats are stored as strings
 };

+class PycComplex : public PycFloat {
+public:
+    PycComplex(int type = TYPE_COMPLEX)
+        : PycFloat(type), m_imag(0) { }
+
+    ~PycComplex() { if (m_imag) delete[] m_imag; }
+
+    bool isEqual(PycRef<PycObject> obj) const;
+
+    void load(class PycData* stream, class PycModule* mod);
+
+    const char* imag() const { return m_imag; }
+
+private:
+    char* m_imag;
+};
+
+class PycCFloat : public PycObject {
+public:
+    PycCFloat(int type = TYPE_BINARY_FLOAT)
+        : PycObject(type), m_value(0.0) { }
+
+    bool isEqual(PycRef<PycObject> obj) const
+    {
+        return (type() == obj->type()) &&
+               (m_value == obj.cast<PycCFloat>()->m_value);
+    }
+
+    void load(class PycData* stream, class PycModule* mod);
+
+    double value() const { return m_value; }
+
+private:
+    double m_value;
+};
+
+class PycCComplex : public PycCFloat {
+public:
+    PycCComplex(int type = TYPE_BINARY_COMPLEX)
+        : PycCFloat(type), m_imag(0.0) { }
+
+    bool isEqual(PycRef<PycObject> obj) const
+    {
+        return (PycCFloat::isEqual(obj)) &&
+               (m_imag == obj.cast<PycCComplex>()->m_imag);
+    }
+
+    void load(class PycData* stream, class PycModule* mod);
+
+    double imag() const { return m_imag; }
+
+private:
+    double m_imag;
+};
+
 #endif
--- a/object.cpp
+++ b/object.cpp
@@ -30,16 +30,16 @@ PycRef<PycObject> CreateObject(int type)
        return Pyc_Ellipsis;
    case PycObject::TYPE_INT:
        return new PycInt();
-    //case PycObject::TYPE_INT64:
-    //    ...
+    case PycObject::TYPE_INT64:
+        return new PycLong(PycObject::TYPE_INT64);
    case PycObject::TYPE_FLOAT:
        return new PycFloat();
-    //case PycObject::TYPE_BINARY_FLOAT:
-    //    ...
-    //case PycObject::TYPE_COMPLEX:
-    //    ...
-    //case PycObject::TYPE_BINARY_COMPLEX:
-    //    ...
+    case PycObject::TYPE_BINARY_FLOAT:
+        return new PycCFloat();
+    case PycObject::TYPE_COMPLEX:
+        return new PycComplex();
+    case PycObject::TYPE_BINARY_COMPLEX:
+        return new PycCComplex();
    case PycObject::TYPE_LONG:
        return new PycLong();
    case PycObject::TYPE_STRING:
@@ -58,11 +58,11 @@ PycRef<PycObject> CreateObject(int type)
    case PycObject::TYPE_CODE2:
        return new PycCode();
    case PycObject::TYPE_UNICODE:
-        return new PycUnicode();
-    //case PycObject::TYPE_SET:
-    //    ...
-    //case PycObject::TYPE_FROZENSET:
-    //    ...
+        return new PycString(PycObject::TYPE_UNICODE);
+    case PycObject::TYPE_SET:
+        return new PycSet();
+    case PycObject::TYPE_FROZENSET:
+        return new PycSet(PycObject::TYPE_FROZENSET);
    default:
        fprintf(stderr, "CreateObject: Got unsupported type 0x%X\n", type);
        return (PycObject*)0;
--- a/object.h
+++ b/object.h
@@ -80,9 +80,6 @@ public:

    int type() const { return (this) ? m_type : TYPE_NULL; }

-    virtual bool isType(int type) const
-    { return (this) ? type == m_type : type == TYPE_NULL; }
-
    virtual bool isEqual(PycRef<PycObject> obj) const
    { return (this == (PycObject*)obj); }

--- a/pycdas.cpp
+++ b/pycdas.cpp
@@ -71,17 +71,20 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
    case PycObject::TYPE_STRING:
    case PycObject::TYPE_STRINGREF:
    case PycObject::TYPE_INTERNED:
-        iprintf(indent, "\"");
-        OutputString(obj.cast<PycString>(), QS_Double);
-        printf("\"\n");
+        if (mod->majorVer() == 3)
+            iprintf(indent, "b'");
+        else
+            iprintf(indent, "'");
+        OutputString(obj.cast<PycString>(), QS_Single);
+        printf("'\n");
        break;
    case PycObject::TYPE_UNICODE:
        if (mod->majorVer() == 3)
-            iprintf(indent, "\"");
+            iprintf(indent, "'");
        else
-            iprintf(indent, "u\"");
-        OutputString(obj.cast<PycString>(), QS_Double);
-        printf("\"\n");
+            iprintf(indent, "u'");
+        OutputString(obj.cast<PycString>(), QS_Single);
+        printf("'\n");
        break;
    case PycObject::TYPE_TUPLE:
        {
@@ -116,6 +119,15 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
            iprintf(indent, "}\n");
        }
        break;
+    case PycObject::TYPE_SET:
+        {
+            iprintf(indent, "{\n");
+            PycSet::value_t values = obj.cast<PycSet>()->values();
+            for (PycSet::value_t::iterator i = values.begin(); i != values.end(); i++)
+                output_object(*i, mod, indent + 1);
+            iprintf(indent, "}\n");
+        }
+        break;
    case PycObject::TYPE_NONE:
        iprintf(indent, "None\n");
        break;
@@ -131,6 +143,17 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
    case PycObject::TYPE_FLOAT:
        iprintf(indent, "%s\n", obj.cast<PycFloat>()->value());
        break;
+    case PycObject::TYPE_COMPLEX:
+        iprintf(indent, "(%s+%sj)\n", obj.cast<PycComplex>()->value(),
+                                      obj.cast<PycComplex>()->imag());
+        break;
+    case PycObject::TYPE_BINARY_FLOAT:
+        iprintf(indent, "%g\n", obj.cast<PycCFloat>()->value());
+        break;
+    case PycObject::TYPE_BINARY_COMPLEX:
+        iprintf(indent, "(%g+%gj)\n", obj.cast<PycCComplex>()->value(),
+                                      obj.cast<PycCComplex>()->imag());
+        break;
    default:
        iprintf(indent, "<TYPE: %d>\n", obj->type());
    }
@@ -146,7 +169,7 @@ int main(int argc, char* argv[])
    PycModule mod;
    mod.loadFromFile(argv[1]);
    printf("%s (Python %d.%d%s)\n", argv[1], mod.majorVer(), mod.minorVer(),
-           mod.isUnicode() ? " -U" : "");
+           (mod.majorVer() < 3 && mod.isUnicode()) ? " -U" : "");
    output_object(mod.code().cast<PycObject>(), &mod, 0);

    return 0;
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -13,6 +13,9 @@ void PycTuple::load(PycData* stream, PycModule* mod)

 bool PycTuple::isEqual(PycRef<PycObject> obj) const
 {
+    if (type() != obj->type())
+        return false;
+
    PycRef<PycTuple> tupleObj = obj.cast<PycTuple>();
    if (m_size != tupleObj->m_size)
        return false;
@@ -37,6 +40,9 @@ void PycList::load(PycData* stream, PycModule* mod)

 bool PycList::isEqual(PycRef<PycObject> obj) const
 {
+    if (type() != obj->type())
+        return false;
+
    PycRef<PycList> listObj = obj.cast<PycList>();
    if (m_size != listObj->m_size)
        return false;
@@ -67,6 +73,9 @@ void PycDict::load(PycData* stream, PycModule* mod)

 bool PycDict::isEqual(PycRef<PycObject> obj) const
 {
+    if (type() != obj->type())
+        return false;
+
    PycRef<PycDict> dictObj = obj.cast<PycDict>();
    if (m_size != dictObj->m_size)
        return false;
@@ -100,3 +109,30 @@ PycRef<PycObject> PycDict::get(PycRef<PycObject> key) const
    }
    return Pyc_NULL; // Disassembly shouldn't get non-existant keys
 }
+
+
+/* PycSet */
+void PycSet::load(PycData* stream, PycModule* mod)
+{
+    m_size = stream->get32();
+    for (int i=0; i<m_size; i++)
+        m_values.insert(LoadObject(stream, mod));
+}
+
+bool PycSet::isEqual(PycRef<PycObject> obj) const
+{
+    if (type() != obj->type())
+        return false;
+
+    PycRef<PycSet> setObj = obj.cast<PycSet>();
+    if (m_size != setObj->m_size)
+        return false;
+    value_t::const_iterator it1 = m_values.begin();
+    value_t::const_iterator it2 = setObj->m_values.begin();
+    while (it1 != m_values.end()) {
+        if (!(*it1)->isEqual(*it2))
+            return false;
+        ++it1, ++it2;
+    }
+    return true;
+}
--- a/sequence.h
+++ b/sequence.h
@@ -4,6 +4,7 @@
 #include "object.h"
 #include <vector>
 #include <list>
+#include <set>

 class PycSequence : public PycObject {
 public:
@@ -22,9 +23,6 @@ public:

    PycTuple(int type = TYPE_TUPLE) : PycSequence(type) { }

-    bool isType(int type) const
-    { return (type == TYPE_TUPLE) || PycObject::isType(type); }
-
    bool isEqual(PycRef<PycObject> obj) const;

    void load(class PycData* stream, class PycModule* mod);
@@ -42,9 +40,6 @@ public:

    PycList(int type = TYPE_LIST) : PycSequence(type) { }

-    bool isType(int type) const
-    { return (type == TYPE_LIST) || PycObject::isType(type); }
-
    bool isEqual(PycRef<PycObject> obj) const;

    void load(class PycData* stream, class PycModule* mod);
@@ -68,9 +63,6 @@ public:

    PycDict(int type = TYPE_DICT) : PycSequence(type) { }

-    bool isType(int type) const
-    { return (type == TYPE_DICT) || PycObject::isType(type); }
-
    bool isEqual(PycRef<PycObject> obj) const;

    void load(class PycData* stream, class PycModule* mod);
@@ -91,4 +83,26 @@ private:
    value_t m_values;
 };

+class PycSet : public PycSequence {
+public:
+    typedef std::set<PycRef<PycObject> > value_t;
+
+    PycSet(int type = TYPE_SET) : PycSequence(type) { }
+
+    bool isEqual(PycRef<PycObject> obj) const;
+
+    void load(class PycData* stream, class PycModule* mod);
+
+    value_t values() const { return m_values; }
+    PycRef<PycObject> get(int idx) const
+    {
+        value_t::const_iterator it = m_values.begin();
+        for (int i=0; i<idx; i++) ++it;
+        return *it;
+    }
+
+private:
+    value_t m_values;
+};
+
 #endif
--- a/string.cpp
+++ b/string.cpp
@@ -35,6 +35,9 @@ void PycString::load(PycData* stream, PycModule* mod)

 bool PycString::isEqual(PycRef<PycObject> obj) const
 {
+    if (type() != obj->type())
+        return false;
+
    PycRef<PycString> strObj = obj.cast<PycString>();
    if (m_value == strObj->m_value)
        return true;
--- a/string.h
+++ b/string.h
@@ -15,12 +15,6 @@ public:

    ~PycString() { if (m_value) delete[] m_value; }

-    bool isType(int type) const
-    {
-        return (type == TYPE_STRING) || (type == TYPE_INTERNED) ||
-               (type == TYPE_STRINGREF) || PycObject::isType(type);
-    }
-
    bool isEqual(PycRef<PycObject> obj) const;

    void load(class PycData* stream, class PycModule* mod);
@@ -33,16 +27,6 @@ private:
    int m_length;
 };

-class PycUnicode : public PycString {
-public:
-    PycUnicode(int type = TYPE_UNICODE) : PycString(type) { }
-
-    bool isType(int type) const
-    {
-        return (type == TYPE_UNICODE) || PycString::isType(type);
-    }
-};
-
 void OutputString(PycRef<PycString> str, QuoteStyle style, FILE* F = stdout);

 #endif