diff --git a/ASTree.cpp b/ASTree.cpp index eacc42d..2b6584c 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1223,7 +1223,8 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) { PycRef t_ob = new ASTObject(code->getConst(operand)); - if (t_ob->object()->type() == PycObject::TYPE_TUPLE && + if ((t_ob->object()->type() == PycObject::TYPE_TUPLE || + t_ob->object()->type() == PycObject::TYPE_SMALL_TUPLE) && !t_ob->object().cast()->values().size()) { ASTTuple::value_t values; stack.push(new ASTTuple(values)); @@ -2618,7 +2619,8 @@ void print_src(PycRef node, PycModule* mod) else print_src(import->name(), mod); fprintf(pyc_output, " import "); - if (fromlist->type() == PycObject::TYPE_TUPLE) { + if (fromlist->type() == PycObject::TYPE_TUPLE || + fromlist->type() == PycObject::TYPE_SMALL_TUPLE) { bool first = true; PycTuple::value_t::const_iterator ii = fromlist.cast()->values().begin(); for (; ii != fromlist.cast()->values().end(); ++ii) { @@ -2656,6 +2658,10 @@ void print_src(PycRef node, PycModule* mod) PycRef obj = src.cast()->object(); if (obj->type() == PycObject::TYPE_STRING || obj->type() == PycObject::TYPE_INTERNED || + obj->type() == PycObject::TYPE_ASCII || + obj->type() == PycObject::TYPE_ASCII_INTERNED || + obj->type() == PycObject::TYPE_SHORT_ASCII || + obj->type() == PycObject::TYPE_SHORT_ASCII_INTERNED || obj->type() == PycObject::TYPE_STRINGREF) OutputString(obj.cast(), (mod->majorVer() == 3) ? 'b' : 0, true); else if (obj->type() == PycObject::TYPE_UNICODE) diff --git a/bytecode.cpp b/bytecode.cpp index 0217819..00cf24a 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -160,12 +160,17 @@ void print_const(PycRef obj, PycModule* mod) case PycObject::TYPE_STRING: case PycObject::TYPE_STRINGREF: case PycObject::TYPE_INTERNED: + case PycObject::TYPE_ASCII: + case PycObject::TYPE_ASCII_INTERNED: + case PycObject::TYPE_SHORT_ASCII: + case PycObject::TYPE_SHORT_ASCII_INTERNED: OutputString(obj.cast(), (mod->majorVer() == 3) ? 'b' : 0); break; case PycObject::TYPE_UNICODE: OutputString(obj.cast(), (mod->majorVer() == 3) ? 0 : 'u'); break; case PycObject::TYPE_TUPLE: + case PycObject::TYPE_SMALL_TUPLE: { fprintf(pyc_output, "("); PycTuple::value_t values = obj.cast()->values(); diff --git a/pyc_module.cpp b/pyc_module.cpp index b8439f2..12903ec 100644 --- a/pyc_module.cpp +++ b/pyc_module.cpp @@ -155,9 +155,18 @@ void PycModule::loadFromFile(const char* filename) m_code = LoadObject(&in, this).cast(); } -PycRef PycModule::getIntern(int ref) +PycRef PycModule::getIntern(int ref) const { std::list >::const_iterator it = m_interns.begin(); - for (int i=0; i PycModule::getRef(int ref) const +{ + std::list >::const_iterator it = m_refs.begin(); + while (ref--) + ++it; return *it; } diff --git a/pyc_module.h b/pyc_module.h index 0e8c803..abc384b 100644 --- a/pyc_module.h +++ b/pyc_module.h @@ -52,7 +52,10 @@ public: PycRef code() const { return m_code; } void intern(PycRef str) { m_interns.push_back(str); } - PycRef getIntern(int ref); + PycRef getIntern(int ref) const; + + void refObject(PycRef str) { m_refs.push_back(str); } + PycRef getRef(int ref) const; private: void setVersion(unsigned int magic); @@ -63,6 +66,7 @@ private: PycRef m_code; std::list > m_interns; + std::list > m_refs; }; #endif diff --git a/pyc_object.cpp b/pyc_object.cpp index 1492115..1c73c4d 100644 --- a/pyc_object.cpp +++ b/pyc_object.cpp @@ -30,7 +30,7 @@ PycRef CreateObject(int type) case PycObject::TYPE_INT: return new PycInt(); case PycObject::TYPE_INT64: - return new PycLong(PycObject::TYPE_INT64); + return new PycLong(type); case PycObject::TYPE_FLOAT: return new PycFloat(); case PycObject::TYPE_BINARY_FLOAT: @@ -42,13 +42,17 @@ PycRef CreateObject(int type) case PycObject::TYPE_LONG: return new PycLong(); case PycObject::TYPE_STRING: - return new PycString(); case PycObject::TYPE_INTERNED: - return new PycString(PycObject::TYPE_INTERNED); case PycObject::TYPE_STRINGREF: - return new PycString(PycObject::TYPE_STRINGREF); + case PycObject::TYPE_UNICODE: + case PycObject::TYPE_ASCII: + case PycObject::TYPE_ASCII_INTERNED: + case PycObject::TYPE_SHORT_ASCII: + case PycObject::TYPE_SHORT_ASCII_INTERNED: + return new PycString(type); case PycObject::TYPE_TUPLE: - return new PycTuple(); + case PycObject::TYPE_SMALL_TUPLE: + return new PycTuple(type); case PycObject::TYPE_LIST: return new PycList(); case PycObject::TYPE_DICT: @@ -56,12 +60,9 @@ PycRef CreateObject(int type) case PycObject::TYPE_CODE: case PycObject::TYPE_CODE2: return new PycCode(); - case PycObject::TYPE_UNICODE: - return new PycString(PycObject::TYPE_UNICODE); case PycObject::TYPE_SET: - return new PycSet(); case PycObject::TYPE_FROZENSET: - return new PycSet(PycObject::TYPE_FROZENSET); + return new PycSet(type); default: fprintf(stderr, "CreateObject: Got unsupported type 0x%X\n", type); return Pyc_NULL; @@ -70,8 +71,20 @@ PycRef CreateObject(int type) PycRef LoadObject(PycData* stream, PycModule* mod) { - PycRef obj = CreateObject(stream->getByte()); - if (obj != Pyc_NULL) - obj->load(stream, mod); + int type = stream->getByte(); + PycRef obj; + + if (type == PycObject::TYPE_OBREF) { + int index = stream->get32(); + obj = mod->getRef(index); + } else { + obj = CreateObject(type & 0x7F); + if (obj != Pyc_NULL) { + if (type & 0x80) + mod->refObject(obj); + obj->load(stream, mod); + } + } + return obj; } diff --git a/pyc_object.h b/pyc_object.h index 9c6c903..b2f9321 100644 --- a/pyc_object.h +++ b/pyc_object.h @@ -8,19 +8,19 @@ public: PycRef(_Obj* obj) : m_obj(obj) { - if(m_obj) + if (m_obj) m_obj->addRef(); } PycRef(const PycRef<_Obj>& obj) : m_obj(obj.m_obj) { - if(m_obj) + if (m_obj) m_obj->addRef(); } ~PycRef<_Obj>() { - if(m_obj) + if (m_obj) m_obj->delRef(); } @@ -62,6 +62,9 @@ private: }; +class PycData; +class PycModule; + /* Please only hold PycObjects inside PycRefs! */ class PycObject { public: @@ -83,6 +86,7 @@ public: TYPE_STRING = 's', TYPE_INTERNED = 't', TYPE_STRINGREF = 'R', + TYPE_OBREF = 'r', TYPE_TUPLE = '(', TYPE_LIST = '[', TYPE_DICT = '{', @@ -92,6 +96,11 @@ public: TYPE_UNKNOWN = '?', TYPE_SET = '<', TYPE_FROZENSET = '>', + TYPE_ASCII = 'a', + TYPE_ASCII_INTERNED = 'A', + TYPE_SMALL_TUPLE = ')', + TYPE_SHORT_ASCII = 'z', + TYPE_SHORT_ASCII_INTERNED = 'Z', }; PycObject(int type = TYPE_UNKNOWN) : m_refs(0), m_type(type) { } @@ -102,19 +111,19 @@ public: virtual bool isEqual(PycRef obj) const { return (this == (PycObject*)obj); } - virtual void load(class PycData*, class PycModule*) { } + virtual void load(PycData*, PycModule*) { } private: int m_refs; int m_type; public: - void addRef() { if (this) ++m_refs; } - void delRef() { if (this && --m_refs == 0) delete this; } + void addRef() { ++m_refs; } + void delRef() { if (--m_refs == 0) delete this; } }; PycRef CreateObject(int type); -PycRef LoadObject(class PycData* stream, class PycModule* mod); +PycRef LoadObject(PycData* stream, PycModule* mod); /* Static Singleton objects */ extern PycRef Pyc_NULL; diff --git a/pyc_sequence.cpp b/pyc_sequence.cpp index 2d638c9..13b7f84 100644 --- a/pyc_sequence.cpp +++ b/pyc_sequence.cpp @@ -5,7 +5,11 @@ /* PycTuple */ void PycTuple::load(PycData* stream, PycModule* mod) { - m_size = stream->get32(); + if (type() == TYPE_SMALL_TUPLE) + m_size = stream->getByte(); + else + m_size = stream->get32(); + m_values.resize(m_size); for (int i=0; i +static void ascii_to_utf8(char** data) +{ + size_t utf8len = 0, asciilen = 0; + unsigned char* cp = reinterpret_cast(*data); + while (*cp) { + if (*cp & 0x80) + utf8len += 2; + else + utf8len += 1; + + // Advance ASCII pointer + ++asciilen; + ++cp; + } + + if (asciilen == utf8len) { + // This can only happen if all characters are [0x00-0x7f]. + // If that happens, we don't need to do any conversion, nor + // reallocate any buffers. Woot! + return; + } + + char* utf8_buffer = new char[utf8len + 1]; + unsigned char* up = reinterpret_cast(utf8_buffer); + cp = reinterpret_cast(*data); + while (*cp) { + if (*cp & 0x80) { + *up++ = 0xC0 | ((*cp >> 6) & 0x1F); + *up++ = 0x80 | ((*cp ) & 0x3F); + } else { + *up++ = *cp; + } + ++cp; + } + + utf8_buffer[utf8len] = 0; + delete[] *data; + *data = utf8_buffer; +} + /* PycString */ void PycString::load(PycData* stream, PycModule* mod) { @@ -20,16 +60,25 @@ void PycString::load(PycData* stream, PycModule* mod) m_value = 0; } } else { - m_length = stream->get32(); + if (type() == TYPE_SHORT_ASCII || type() == TYPE_SHORT_ASCII_INTERNED) + m_length = stream->getByte(); + else + m_length = stream->get32(); + if (m_length) { m_value = new char[m_length+1]; stream->getBuffer(m_length, m_value); m_value[m_length] = 0; + + if (type() == TYPE_ASCII || type() == TYPE_ASCII_INTERNED || + type() == TYPE_SHORT_ASCII || type() == TYPE_SHORT_ASCII_INTERNED) + ascii_to_utf8(&m_value); } else { m_value = 0; } - if (type() == TYPE_INTERNED) + if (type() == TYPE_INTERNED || type() == TYPE_ASCII_INTERNED || + type() == TYPE_SHORT_ASCII_INTERNED) mod->intern(this); } } diff --git a/pycdas.cpp b/pycdas.cpp index cf1772d..e99c240 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -115,6 +115,10 @@ void output_object(PycRef obj, PycModule* mod, int indent) case PycObject::TYPE_STRING: case PycObject::TYPE_STRINGREF: case PycObject::TYPE_INTERNED: + case PycObject::TYPE_ASCII: + case PycObject::TYPE_ASCII_INTERNED: + case PycObject::TYPE_SHORT_ASCII: + case PycObject::TYPE_SHORT_ASCII_INTERNED: iprintf(indent, ""); OutputString(obj.cast(), (mod->majorVer() == 3) ? 'b' : 0); fprintf(pyc_output, "\n"); @@ -125,6 +129,7 @@ void output_object(PycRef obj, PycModule* mod, int indent) fprintf(pyc_output, "\n"); break; case PycObject::TYPE_TUPLE: + case PycObject::TYPE_SMALL_TUPLE: { iprintf(indent, "(\n"); PycTuple::value_t values = obj.cast()->values();