Compare commits

7 Commits

Author SHA1 Message Date
Michael Hansen
a05ddec0d8 Merge pull request #564 from whoami730/exceptions
Parse and display exception table
2025-08-30 10:02:14 -07:00
Sahil Jain
d8c6fdf711 Address comments 2025-08-30 20:01:32 +05:30
Michael Hansen
577720302e Add basic protection aginst circular references in pycdas and pycdc.
This fixes the last case of fuzzer errors detected by #572.
2025-08-28 16:42:03 -07:00
Michael Hansen
38799f5cfb Also check EOF in getBuffer() 2025-08-28 15:58:28 -07:00
Michael Hansen
0e7be40367 Add some extra guards against null dereference and empty std::stack pops
Fixes segfault cases of #572
2025-08-28 15:36:35 -07:00
Michael Hansen
ff0c1450b4 Abort immediately when attempting to read past end of stream.
No consumers of readByte() were actually checking for EOF, so they would
all keep re-reading the same byte over and over again, potentially until the
process runs out of memory (ref #572).
2025-08-28 15:14:55 -07:00
Sahil Jain
e8e10f1419 Parse exception table 2025-07-15 22:47:02 +05:30
8 changed files with 144 additions and 26 deletions

View File

@@ -1,6 +1,7 @@
#include <cstring> #include <cstring>
#include <cstdint> #include <cstdint>
#include <stdexcept> #include <stdexcept>
#include <unordered_set>
#include "ASTree.h" #include "ASTree.h"
#include "FastStack.h" #include "FastStack.h"
#include "pyc_numeric.h" #include "pyc_numeric.h"
@@ -1231,8 +1232,12 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
break; break;
} }
stack = stack_hist.top(); if (!stack_hist.empty()) {
stack_hist.pop(); stack = stack_hist.top();
stack_hist.pop();
} else {
fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n");
}
PycRef<ASTBlock> prev = curblock; PycRef<ASTBlock> prev = curblock;
PycRef<ASTBlock> nil; PycRef<ASTBlock> nil;
@@ -1389,10 +1394,10 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
} while (prev != nil); } while (prev != nil);
curblock = blocks.top(); if (!blocks.empty()) {
curblock = blocks.top();
if (curblock->blktype() == ASTBlock::BLK_EXCEPT) { if (curblock->blktype() == ASTBlock::BLK_EXCEPT)
curblock->setEnd(pos+offs); curblock->setEnd(pos+offs);
} }
} }
break; break;
@@ -1769,7 +1774,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
else else
curblock->append(new ASTPrint(stack.top(), stream)); curblock->append(new ASTPrint(stack.top(), stream));
stack.pop(); stack.pop();
stream->setProcessed(); if (stream)
stream->setProcessed();
} }
break; break;
case Pyc::PRINT_NEWLINE: case Pyc::PRINT_NEWLINE:
@@ -1797,7 +1803,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
else else
curblock->append(new ASTPrint(nullptr, stream)); curblock->append(new ASTPrint(nullptr, stream));
stack.pop(); stack.pop();
stream->setProcessed(); if (stream)
stream->setProcessed();
} }
break; break;
case Pyc::RAISE_VARARGS_A: case Pyc::RAISE_VARARGS_A:
@@ -2773,6 +2780,8 @@ void print_formatted_value(PycRef<ASTFormattedValue> formatted_value, PycModule*
pyc_output << "}"; pyc_output << "}";
} }
static std::unordered_set<ASTNode *> node_seen;
void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output) void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
{ {
if (node == NULL) { if (node == NULL) {
@@ -2781,6 +2790,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
return; return;
} }
if (node_seen.find((ASTNode *)node) != node_seen.end()) {
fputs("WARNING: Circular reference detected\n", stderr);
return;
}
node_seen.insert((ASTNode *)node);
switch (node->type()) { switch (node->type()) {
case ASTNode::NODE_BINARY: case ASTNode::NODE_BINARY:
case ASTNode::NODE_COMPARE: case ASTNode::NODE_COMPARE:
@@ -3436,10 +3451,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
pyc_output << "<NODE:" << node->type() << ">"; pyc_output << "<NODE:" << node->type() << ">";
fprintf(stderr, "Unsupported Node type: %d\n", node->type()); fprintf(stderr, "Unsupported Node type: %d\n", node->type());
cleanBuild = false; cleanBuild = false;
node_seen.erase((ASTNode *)node);
return; return;
} }
cleanBuild = true; cleanBuild = true;
node_seen.erase((ASTNode *)node);
} }
bool print_docstring(PycRef<PycObject> obj, int indent, PycModule* mod, bool print_docstring(PycRef<PycObject> obj, int indent, PycModule* mod,
@@ -3456,8 +3473,16 @@ bool print_docstring(PycRef<PycObject> obj, int indent, PycModule* mod,
return false; return false;
} }
static std::unordered_set<PycCode *> code_seen;
void decompyle(PycRef<PycCode> code, PycModule* mod, std::ostream& pyc_output) void decompyle(PycRef<PycCode> code, PycModule* mod, std::ostream& pyc_output)
{ {
if (code_seen.find((PycCode *)code) != code_seen.end()) {
fputs("WARNING: Circular reference detected\n", stderr);
return;
}
code_seen.insert((PycCode *)code);
PycRef<ASTNode> source = BuildFromCode(code, mod); PycRef<ASTNode> source = BuildFromCode(code, mod);
PycRef<ASTNodeList> clean = source.cast<ASTNodeList>(); PycRef<ASTNodeList> clean = source.cast<ASTNodeList>();
@@ -3551,4 +3576,6 @@ void decompyle(PycRef<PycCode> code, PycModule* mod, std::ostream& pyc_output)
start_line(cur_indent, pyc_output); start_line(cur_indent, pyc_output);
pyc_output << "# WARNING: Decompyle incomplete\n"; pyc_output << "# WARNING: Decompyle incomplete\n";
} }
code_seen.erase((PycCode *)code);
} }

View File

@@ -600,3 +600,18 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
pyc_output << "\n"; pyc_output << "\n";
} }
} }
void bc_exceptiontable(std::ostream& pyc_output, PycRef<PycCode> code,
int indent)
{
for (const auto& entry : code->exceptionTableEntries()) {
for (int i=0; i<indent; i++)
pyc_output << " ";
pyc_output << entry.start_offset << " to " << entry.end_offset
<< " -> " << entry.target << " [" << entry.stack_depth
<< "] " << (entry.push_lasti ? "lasti": "")
<< "\n";
}
}

View File

@@ -32,3 +32,5 @@ void print_const(std::ostream& pyc_output, PycRef<PycObject> obj, PycModule* mod
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos); void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos);
void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod, void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
int indent, unsigned flags); int indent, unsigned flags);
void bc_exceptiontable(std::ostream& pyc_output, PycRef<PycCode> code,
int indent);

View File

@@ -53,35 +53,43 @@ bool PycFile::atEof() const
int PycFile::getByte() int PycFile::getByte()
{ {
int ch = fgetc(m_stream); int ch = fgetc(m_stream);
if (ch == EOF) if (ch == EOF) {
ungetc(ch, m_stream); fputs("PycFile::getByte(): Unexpected end of stream\n", stderr);
std::exit(1);
}
return ch; return ch;
} }
int PycFile::getBuffer(int bytes, void* buffer) void PycFile::getBuffer(int bytes, void* buffer)
{ {
return (int)fread(buffer, 1, bytes, m_stream); if (fread(buffer, 1, bytes, m_stream) != (size_t)bytes) {
fputs("PycFile::getBuffer(): Unexpected end of stream\n", stderr);
std::exit(1);
}
} }
/* PycBuffer */ /* PycBuffer */
int PycBuffer::getByte() int PycBuffer::getByte()
{ {
if (atEof()) if (atEof()) {
return EOF; fputs("PycBuffer::getByte(): Unexpected end of stream\n", stderr);
std::exit(1);
}
int ch = (int)(*(m_buffer + m_pos)); int ch = (int)(*(m_buffer + m_pos));
++m_pos; ++m_pos;
return ch & 0xFF; // Make sure it's just a byte! return ch & 0xFF; // Make sure it's just a byte!
} }
int PycBuffer::getBuffer(int bytes, void* buffer) void PycBuffer::getBuffer(int bytes, void* buffer)
{ {
if (m_pos + bytes > m_size) if (m_pos + bytes > m_size) {
bytes = m_size - m_pos; fputs("PycBuffer::getBuffer(): Unexpected end of stream\n", stderr);
std::exit(1);
}
if (bytes != 0) if (bytes != 0)
memcpy(buffer, (m_buffer + m_pos), bytes); memcpy(buffer, (m_buffer + m_pos), bytes);
m_pos += bytes; m_pos += bytes;
return bytes;
} }
int formatted_print(std::ostream& stream, const char* format, ...) int formatted_print(std::ostream& stream, const char* format, ...)

6
data.h
View File

@@ -19,7 +19,7 @@ public:
virtual bool atEof() const = 0; virtual bool atEof() const = 0;
virtual int getByte() = 0; virtual int getByte() = 0;
virtual int getBuffer(int bytes, void* buffer) = 0; virtual void getBuffer(int bytes, void* buffer) = 0;
int get16(); int get16();
int get32(); int get32();
Pyc_INT64 get64(); Pyc_INT64 get64();
@@ -34,7 +34,7 @@ public:
bool atEof() const override; bool atEof() const override;
int getByte() override; int getByte() override;
int getBuffer(int bytes, void* buffer) override; void getBuffer(int bytes, void* buffer) override;
private: private:
FILE* m_stream; FILE* m_stream;
@@ -50,7 +50,7 @@ public:
bool atEof() const override { return (m_pos == m_size); } bool atEof() const override { return (m_pos == m_size); }
int getByte() override; int getByte() override;
int getBuffer(int bytes, void* buffer) override; void getBuffer(int bytes, void* buffer) override;
private: private:
const unsigned char* m_buffer; const unsigned char* m_buffer;

View File

@@ -128,3 +128,44 @@ PycRef<PycString> PycCode::getCellVar(PycModule* mod, int idx) const
? m_freeVars->get(idx - m_cellVars->size()).cast<PycString>() ? m_freeVars->get(idx - m_cellVars->size()).cast<PycString>()
: m_cellVars->get(idx).cast<PycString>(); : m_cellVars->get(idx).cast<PycString>();
} }
int _parse_varint(PycBuffer& data, int& pos) {
int b = data.getByte();
pos += 1;
int val = b & 0x3F;
while (b & 0x40) {
val <<= 6;
b = data.getByte();
pos += 1;
val |= (b & 0x3F);
}
return val;
}
std::vector<PycExceptionTableEntry> PycCode::exceptionTableEntries() const
{
PycBuffer data(m_exceptTable->value(), m_exceptTable->length());
std::vector<PycExceptionTableEntry> entries;
int pos = 0;
while (!data.atEof()) {
int start = _parse_varint(data, pos) * 2;
int length = _parse_varint(data, pos) * 2;
int end = start + length;
int target = _parse_varint(data, pos) * 2;
int dl = _parse_varint(data, pos);
int depth = dl >> 1;
bool lasti = bool(dl & 1);
entries.push_back(PycExceptionTableEntry(start, end, target, depth, lasti));
}
return entries;
}

View File

@@ -8,6 +8,18 @@
class PycData; class PycData;
class PycModule; class PycModule;
class PycExceptionTableEntry {
public:
int start_offset; // inclusive
int end_offset; // exclusive
int target;
int stack_depth;
bool push_lasti;
PycExceptionTableEntry(int m_start_offset, int m_end_offset, int m_target, int m_stack_depth, bool m_push_lasti) :
start_offset(m_start_offset), end_offset(m_end_offset), target(m_target), stack_depth(m_stack_depth), push_lasti(m_push_lasti) {};
};
class PycCode : public PycObject { class PycCode : public PycObject {
public: public:
typedef std::vector<PycRef<PycString>> globals_t; typedef std::vector<PycRef<PycString>> globals_t;
@@ -87,6 +99,8 @@ public:
m_globalsUsed.emplace_back(std::move(varname)); m_globalsUsed.emplace_back(std::move(varname));
} }
std::vector<PycExceptionTableEntry> exceptionTableEntries() const;
private: private:
int m_argCount, m_posOnlyArgCount, m_kwOnlyArgCount, m_numLocals; int m_argCount, m_posOnlyArgCount, m_kwOnlyArgCount, m_numLocals;
int m_stackSize, m_flags; int m_stackSize, m_flags;

View File

@@ -4,6 +4,7 @@
#include <string> #include <string>
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <unordered_set>
#include "pyc_module.h" #include "pyc_module.h"
#include "pyc_numeric.h" #include "pyc_numeric.h"
#include "bytecode.h" #include "bytecode.h"
@@ -73,6 +74,8 @@ static void iprintf(std::ostream& pyc_output, int indent, const char* fmt, ...)
va_end(varargs); va_end(varargs);
} }
static std::unordered_set<PycObject *> out_seen;
void output_object(PycRef<PycObject> obj, PycModule* mod, int indent, void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
unsigned flags, std::ostream& pyc_output) unsigned flags, std::ostream& pyc_output)
{ {
@@ -81,6 +84,12 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
return; return;
} }
if (out_seen.find((PycObject *)obj) != out_seen.end()) {
fputs("WARNING: Circular reference detected\n", stderr);
return;
}
out_seen.insert((PycObject *)obj);
switch (obj->type()) { switch (obj->type()) {
case PycObject::TYPE_CODE: case PycObject::TYPE_CODE:
case PycObject::TYPE_CODE2: case PycObject::TYPE_CODE2:
@@ -145,16 +154,16 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
iputs(pyc_output, indent + 1, "[Disassembly]\n"); iputs(pyc_output, indent + 1, "[Disassembly]\n");
bc_disasm(pyc_output, codeObj, mod, indent + 2, flags); bc_disasm(pyc_output, codeObj, mod, indent + 2, flags);
if (mod->verCompare(3, 11) >= 0) {
iputs(pyc_output, indent + 1, "[Exception Table]\n");
bc_exceptiontable(pyc_output, codeObj, indent+2);
}
if (mod->verCompare(1, 5) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) { if (mod->verCompare(1, 5) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) {
iprintf(pyc_output, indent + 1, "First Line: %d\n", codeObj->firstLine()); iprintf(pyc_output, indent + 1, "First Line: %d\n", codeObj->firstLine());
iputs(pyc_output, indent + 1, "[Line Number Table]\n"); iputs(pyc_output, indent + 1, "[Line Number Table]\n");
output_object(codeObj->lnTable().cast<PycObject>(), mod, indent + 2, flags, pyc_output); output_object(codeObj->lnTable().cast<PycObject>(), mod, indent + 2, flags, pyc_output);
} }
if (mod->verCompare(3, 11) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) {
iputs(pyc_output, indent + 1, "[Exception Table]\n");
output_object(codeObj->exceptTable().cast<PycObject>(), mod, indent + 2, flags, pyc_output);
}
} }
break; break;
case PycObject::TYPE_STRING: case PycObject::TYPE_STRING:
@@ -246,6 +255,8 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
default: default:
iprintf(pyc_output, indent, "<TYPE: %d>\n", obj->type()); iprintf(pyc_output, indent, "<TYPE: %d>\n", obj->type());
} }
out_seen.erase((PycObject *)obj);
} }
int main(int argc, char* argv[]) int main(int argc, char* argv[])