diff --git a/ASTree.cpp b/ASTree.cpp index 36f722f..8963f07 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "ASTree.h" #include "FastStack.h" #include "pyc_numeric.h" @@ -1296,8 +1297,12 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; } - stack = stack_hist.top(); - stack_hist.pop(); + if (!stack_hist.empty()) { + stack = stack_hist.top(); + stack_hist.pop(); + } else { + fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n"); + } PycRef prev = curblock; PycRef nil; @@ -1468,10 +1473,10 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } while (prev != nil); - curblock = blocks.top(); - - if (curblock->blktype() == ASTBlock::BLK_EXCEPT) { - curblock->setEnd(pos+offs); + if (!blocks.empty()) { + curblock = blocks.top(); + if (curblock->blktype() == ASTBlock::BLK_EXCEPT) + curblock->setEnd(pos+offs); } } break; @@ -1889,7 +1894,8 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) else curblock->append(new ASTPrint(stack.top(), stream)); stack.pop(); - stream->setProcessed(); + if (stream) + stream->setProcessed(); } break; case Pyc::PRINT_NEWLINE: @@ -1917,7 +1923,8 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) else curblock->append(new ASTPrint(nullptr, stream)); stack.pop(); - stream->setProcessed(); + if (stream) + stream->setProcessed(); } break; case Pyc::RAISE_VARARGS_A: @@ -1940,8 +1947,6 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); - - bc_next(source, mod, opcode, operand, pos); } } break; @@ -2938,6 +2943,8 @@ void print_formatted_value(PycRef formatted_value, PycModule* pyc_output << "}"; } +static std::unordered_set node_seen; + void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) { if (node == NULL) { @@ -2946,6 +2953,12 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) return; } + if (node_seen.find((ASTNode *)node) != node_seen.end()) { + fputs("WARNING: Circular reference detected\n", stderr); + return; + } + node_seen.insert((ASTNode *)node); + switch (node->type()) { case ASTNode::NODE_BINARY: case ASTNode::NODE_COMPARE: @@ -3607,10 +3620,12 @@ void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) pyc_output << "type() << ">"; fprintf(stderr, "Unsupported Node type: %d\n", node->type()); cleanBuild = false; + node_seen.erase((ASTNode *)node); return; } cleanBuild = true; + node_seen.erase((ASTNode *)node); } bool print_docstring(PycRef obj, int indent, PycModule* mod, @@ -3627,8 +3642,16 @@ bool print_docstring(PycRef obj, int indent, PycModule* mod, return false; } +static std::unordered_set code_seen; + void decompyle(PycRef code, PycModule* mod, std::ostream& pyc_output) { + if (code_seen.find((PycCode *)code) != code_seen.end()) { + fputs("WARNING: Circular reference detected\n", stderr); + return; + } + code_seen.insert((PycCode *)code); + PycRef source = BuildFromCode(code, mod); PycRef clean = source.cast(); @@ -3722,4 +3745,6 @@ void decompyle(PycRef code, PycModule* mod, std::ostream& pyc_output) start_line(cur_indent, pyc_output); pyc_output << "# WARNING: Decompyle incomplete\n"; } + + code_seen.erase((PycCode *)code); } diff --git a/bytecode.cpp b/bytecode.cpp index c6b7cba..6bee279 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -600,3 +600,18 @@ void bc_disasm(std::ostream& pyc_output, PycRef code, PycModule* mod, pyc_output << "\n"; } } + +void bc_exceptiontable(std::ostream& pyc_output, PycRef code, + int indent) +{ + for (const auto& entry : code->exceptionTableEntries()) { + + for (int i=0; i " << entry.target << " [" << entry.stack_depth + << "] " << (entry.push_lasti ? "lasti": "") + << "\n"; + } +} diff --git a/bytecode.h b/bytecode.h index 7e4179e..3c0d9d3 100644 --- a/bytecode.h +++ b/bytecode.h @@ -32,3 +32,5 @@ void print_const(std::ostream& pyc_output, PycRef obj, PycModule* mod void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos); void bc_disasm(std::ostream& pyc_output, PycRef code, PycModule* mod, int indent, unsigned flags); +void bc_exceptiontable(std::ostream& pyc_output, PycRef code, + int indent); diff --git a/data.cpp b/data.cpp index 1be5aa6..2b560a7 100644 --- a/data.cpp +++ b/data.cpp @@ -53,35 +53,43 @@ bool PycFile::atEof() const int PycFile::getByte() { int ch = fgetc(m_stream); - if (ch == EOF) - ungetc(ch, m_stream); + if (ch == EOF) { + fputs("PycFile::getByte(): Unexpected end of stream\n", stderr); + std::exit(1); + } return ch; } -int PycFile::getBuffer(int bytes, void* buffer) +void PycFile::getBuffer(int bytes, void* buffer) { - return (int)fread(buffer, 1, bytes, m_stream); + if (fread(buffer, 1, bytes, m_stream) != (size_t)bytes) { + fputs("PycFile::getBuffer(): Unexpected end of stream\n", stderr); + std::exit(1); + } } /* PycBuffer */ int PycBuffer::getByte() { - if (atEof()) - return EOF; + if (atEof()) { + fputs("PycBuffer::getByte(): Unexpected end of stream\n", stderr); + std::exit(1); + } int ch = (int)(*(m_buffer + m_pos)); ++m_pos; return ch & 0xFF; // Make sure it's just a byte! } -int PycBuffer::getBuffer(int bytes, void* buffer) +void PycBuffer::getBuffer(int bytes, void* buffer) { - if (m_pos + bytes > m_size) - bytes = m_size - m_pos; + if (m_pos + bytes > m_size) { + fputs("PycBuffer::getBuffer(): Unexpected end of stream\n", stderr); + std::exit(1); + } if (bytes != 0) memcpy(buffer, (m_buffer + m_pos), bytes); m_pos += bytes; - return bytes; } int formatted_print(std::ostream& stream, const char* format, ...) diff --git a/data.h b/data.h index 376d318..28cc85e 100644 --- a/data.h +++ b/data.h @@ -19,7 +19,7 @@ public: virtual bool atEof() const = 0; virtual int getByte() = 0; - virtual int getBuffer(int bytes, void* buffer) = 0; + virtual void getBuffer(int bytes, void* buffer) = 0; int get16(); int get32(); Pyc_INT64 get64(); @@ -34,7 +34,7 @@ public: bool atEof() const override; int getByte() override; - int getBuffer(int bytes, void* buffer) override; + void getBuffer(int bytes, void* buffer) override; private: FILE* m_stream; @@ -50,7 +50,7 @@ public: bool atEof() const override { return (m_pos == m_size); } int getByte() override; - int getBuffer(int bytes, void* buffer) override; + void getBuffer(int bytes, void* buffer) override; private: const unsigned char* m_buffer; diff --git a/pyc_code.cpp b/pyc_code.cpp index e080bb2..2716abf 100644 --- a/pyc_code.cpp +++ b/pyc_code.cpp @@ -128,3 +128,44 @@ PycRef PycCode::getCellVar(PycModule* mod, int idx) const ? m_freeVars->get(idx - m_cellVars->size()).cast() : m_cellVars->get(idx).cast(); } + +int _parse_varint(PycBuffer& data, int& pos) { + int b = data.getByte(); + pos += 1; + + int val = b & 0x3F; + while (b & 0x40) { + val <<= 6; + + b = data.getByte(); + pos += 1; + + val |= (b & 0x3F); + } + return val; +} + +std::vector PycCode::exceptionTableEntries() const +{ + PycBuffer data(m_exceptTable->value(), m_exceptTable->length()); + + std::vector entries; + + int pos = 0; + while (!data.atEof()) { + + int start = _parse_varint(data, pos) * 2; + int length = _parse_varint(data, pos) * 2; + int end = start + length; + + int target = _parse_varint(data, pos) * 2; + int dl = _parse_varint(data, pos); + + int depth = dl >> 1; + bool lasti = bool(dl & 1); + + entries.push_back(PycExceptionTableEntry(start, end, target, depth, lasti)); + } + + return entries; +} diff --git a/pyc_code.h b/pyc_code.h index e6b2ce9..6485729 100644 --- a/pyc_code.h +++ b/pyc_code.h @@ -8,6 +8,18 @@ class PycData; class PycModule; +class PycExceptionTableEntry { +public: + int start_offset; // inclusive + int end_offset; // exclusive + int target; + int stack_depth; + bool push_lasti; + + PycExceptionTableEntry(int m_start_offset, int m_end_offset, int m_target, int m_stack_depth, bool m_push_lasti) : + start_offset(m_start_offset), end_offset(m_end_offset), target(m_target), stack_depth(m_stack_depth), push_lasti(m_push_lasti) {}; +}; + class PycCode : public PycObject { public: typedef std::vector> globals_t; @@ -87,6 +99,8 @@ public: m_globalsUsed.emplace_back(std::move(varname)); } + std::vector exceptionTableEntries() const; + private: int m_argCount, m_posOnlyArgCount, m_kwOnlyArgCount, m_numLocals; int m_stackSize, m_flags; diff --git a/pycdas.cpp b/pycdas.cpp index 8ecf2dc..aecb165 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "pyc_module.h" #include "pyc_numeric.h" #include "bytecode.h" @@ -74,6 +75,8 @@ static void iprintf(std::ostream& pyc_output, int indent, const char* fmt, ...) va_end(varargs); } +static std::unordered_set out_seen; + void output_object(PycRef obj, PycModule* mod, int indent, unsigned flags, std::ostream& pyc_output) { @@ -82,6 +85,12 @@ void output_object(PycRef obj, PycModule* mod, int indent, return; } + if (out_seen.find((PycObject *)obj) != out_seen.end()) { + fputs("WARNING: Circular reference detected\n", stderr); + return; + } + out_seen.insert((PycObject *)obj); + switch (obj->type()) { case PycObject::TYPE_CODE: case PycObject::TYPE_CODE2: @@ -146,16 +155,16 @@ void output_object(PycRef obj, PycModule* mod, int indent, iputs(pyc_output, indent + 1, "[Disassembly]\n"); bc_disasm(pyc_output, codeObj, mod, indent + 2, flags); + if (mod->verCompare(3, 11) >= 0) { + iputs(pyc_output, indent + 1, "[Exception Table]\n"); + bc_exceptiontable(pyc_output, codeObj, indent+2); + } + if (mod->verCompare(1, 5) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) { iprintf(pyc_output, indent + 1, "First Line: %d\n", codeObj->firstLine()); iputs(pyc_output, indent + 1, "[Line Number Table]\n"); output_object(codeObj->lnTable().cast(), mod, indent + 2, flags, pyc_output); } - - if (mod->verCompare(3, 11) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) { - iputs(pyc_output, indent + 1, "[Exception Table]\n"); - output_object(codeObj->exceptTable().cast(), mod, indent + 2, flags, pyc_output); - } } break; case PycObject::TYPE_STRING: @@ -247,6 +256,8 @@ void output_object(PycRef obj, PycModule* mod, int indent, default: iprintf(pyc_output, indent, "\n", obj->type()); } + + out_seen.erase((PycObject *)obj); } int main(int argc, char* argv[]) diff --git a/tests/compiled/test_raise_varargs.3.12.pyc b/tests/compiled/test_raise_varargs.3.12.pyc new file mode 100644 index 0000000..0f5a2c9 Binary files /dev/null and b/tests/compiled/test_raise_varargs.3.12.pyc differ diff --git a/tests/input/test_raise_varargs.py b/tests/input/test_raise_varargs.py new file mode 100644 index 0000000..d0cf568 --- /dev/null +++ b/tests/input/test_raise_varargs.py @@ -0,0 +1,7 @@ +import struct + +def bytes_to_words(b): + '''Convert a byte string (little-endian) to a list of 32-bit words.''' + if len(b) % 4 != 0: + raise ValueError('Input bytes length must be a multiple of 4 for word conversion.') + return struct.unpack('<' + 'I' * (len(b) // 4), b) diff --git a/tests/tokenized/test_raise_varargs.txt b/tests/tokenized/test_raise_varargs.txt new file mode 100644 index 0000000..1ef0b67 --- /dev/null +++ b/tests/tokenized/test_raise_varargs.txt @@ -0,0 +1,9 @@ +import struct +def bytes_to_words ( b ) : + +'Convert a byte string (little-endian) to a list of 32-bit words.' +if len ( b ) % 4 != 0 : + +raise ValueError ( 'Input bytes length must be a multiple of 4 for word conversion.' ) + +return struct . unpack ( '<' + 'I' * ( len ( b ) // 4 ) , b )