Merge branch 'upstream-pycdc'

This commit is contained in:
2025-09-12 17:39:49 +08:00
8 changed files with 144 additions and 26 deletions

View File

@@ -1,6 +1,7 @@
#include <cstring>
#include <cstdint>
#include <stdexcept>
#include <unordered_set>
#include "ASTree.h"
#include "FastStack.h"
#include "pyc_numeric.h"
@@ -1483,8 +1484,12 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
break;
}
stack = stack_hist.top();
stack_hist.pop();
if (!stack_hist.empty()) {
stack = stack_hist.top();
stack_hist.pop();
} else {
fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n");
}
PycRef<ASTBlock> prev = curblock;
PycRef<ASTBlock> nil;
@@ -1655,10 +1660,10 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
} while (prev != nil);
curblock = blocks.top();
if (curblock->blktype() == ASTBlock::BLK_EXCEPT) {
curblock->setEnd(pos+offs);
if (!blocks.empty()) {
curblock = blocks.top();
if (curblock->blktype() == ASTBlock::BLK_EXCEPT)
curblock->setEnd(pos+offs);
}
}
break;
@@ -2076,7 +2081,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
else
curblock->append(new ASTPrint(stack.top(), stream));
stack.pop();
stream->setProcessed();
if (stream)
stream->setProcessed();
}
break;
case Pyc::PRINT_NEWLINE:
@@ -2104,7 +2110,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
else
curblock->append(new ASTPrint(nullptr, stream));
stack.pop();
stream->setProcessed();
if (stream)
stream->setProcessed();
}
break;
case Pyc::RAISE_VARARGS_A:
@@ -3134,6 +3141,8 @@ void print_formatted_value(PycRef<ASTFormattedValue> formatted_value, PycModule*
pyc_output << "}";
}
static std::unordered_set<ASTNode *> node_seen;
void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
{
if (node == NULL) {
@@ -3142,6 +3151,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
return;
}
if (node_seen.find((ASTNode *)node) != node_seen.end()) {
fputs("WARNING: Circular reference detected\n", stderr);
return;
}
node_seen.insert((ASTNode *)node);
switch (node->type()) {
case ASTNode::NODE_BINARY:
case ASTNode::NODE_COMPARE:
@@ -3803,10 +3818,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
pyc_output << "<NODE:" << node->type() << ">";
fprintf(stderr, "Unsupported Node type: %d\n", node->type());
cleanBuild = false;
node_seen.erase((ASTNode *)node);
return;
}
cleanBuild = true;
node_seen.erase((ASTNode *)node);
}
bool print_docstring(PycRef<PycObject> obj, int indent, PycModule* mod,
@@ -3823,8 +3840,16 @@ bool print_docstring(PycRef<PycObject> obj, int indent, PycModule* mod,
return false;
}
static std::unordered_set<PycCode *> code_seen;
void decompyle(PycRef<PycCode> code, PycModule* mod, std::ostream& pyc_output)
{
if (code_seen.find((PycCode *)code) != code_seen.end()) {
fputs("WARNING: Circular reference detected\n", stderr);
return;
}
code_seen.insert((PycCode *)code);
PycRef<ASTNode> source = BuildFromCode(code, mod);
PycRef<ASTNodeList> clean = source.cast<ASTNodeList>();
@@ -3918,4 +3943,6 @@ void decompyle(PycRef<PycCode> code, PycModule* mod, std::ostream& pyc_output)
start_line(cur_indent, pyc_output);
pyc_output << "# WARNING: Decompyle incomplete\n";
}
code_seen.erase((PycCode *)code);
}

View File

@@ -602,3 +602,18 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
pyc_output << "\n";
}
}
void bc_exceptiontable(std::ostream& pyc_output, PycRef<PycCode> code,
int indent)
{
for (const auto& entry : code->exceptionTableEntries()) {
for (int i=0; i<indent; i++)
pyc_output << " ";
pyc_output << entry.start_offset << " to " << entry.end_offset
<< " -> " << entry.target << " [" << entry.stack_depth
<< "] " << (entry.push_lasti ? "lasti": "")
<< "\n";
}
}

View File

@@ -32,3 +32,5 @@ void print_const(std::ostream& pyc_output, PycRef<PycObject> obj, PycModule* mod
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos);
void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
int indent, unsigned flags);
void bc_exceptiontable(std::ostream& pyc_output, PycRef<PycCode> code,
int indent);

View File

@@ -53,35 +53,43 @@ bool PycFile::atEof() const
int PycFile::getByte()
{
int ch = fgetc(m_stream);
if (ch == EOF)
ungetc(ch, m_stream);
if (ch == EOF) {
fputs("PycFile::getByte(): Unexpected end of stream\n", stderr);
std::exit(1);
}
return ch;
}
int PycFile::getBuffer(int bytes, void* buffer)
void PycFile::getBuffer(int bytes, void* buffer)
{
return (int)fread(buffer, 1, bytes, m_stream);
if (fread(buffer, 1, bytes, m_stream) != (size_t)bytes) {
fputs("PycFile::getBuffer(): Unexpected end of stream\n", stderr);
std::exit(1);
}
}
/* PycBuffer */
int PycBuffer::getByte()
{
if (atEof())
return EOF;
if (atEof()) {
fputs("PycBuffer::getByte(): Unexpected end of stream\n", stderr);
std::exit(1);
}
int ch = (int)(*(m_buffer + m_pos));
++m_pos;
return ch & 0xFF; // Make sure it's just a byte!
}
int PycBuffer::getBuffer(int bytes, void* buffer)
void PycBuffer::getBuffer(int bytes, void* buffer)
{
if (m_pos + bytes > m_size)
bytes = m_size - m_pos;
if (m_pos + bytes > m_size) {
fputs("PycBuffer::getBuffer(): Unexpected end of stream\n", stderr);
std::exit(1);
}
if (bytes != 0)
memcpy(buffer, (m_buffer + m_pos), bytes);
m_pos += bytes;
return bytes;
}
int formatted_print(std::ostream& stream, const char* format, ...)

6
data.h
View File

@@ -19,7 +19,7 @@ public:
virtual bool atEof() const = 0;
virtual int getByte() = 0;
virtual int getBuffer(int bytes, void* buffer) = 0;
virtual void getBuffer(int bytes, void* buffer) = 0;
int get16();
int get32();
Pyc_INT64 get64();
@@ -34,7 +34,7 @@ public:
bool atEof() const override;
int getByte() override;
int getBuffer(int bytes, void* buffer) override;
void getBuffer(int bytes, void* buffer) override;
private:
FILE* m_stream;
@@ -50,7 +50,7 @@ public:
bool atEof() const override { return (m_pos == m_size); }
int getByte() override;
int getBuffer(int bytes, void* buffer) override;
void getBuffer(int bytes, void* buffer) override;
private:
const unsigned char* m_buffer;

View File

@@ -239,3 +239,44 @@ PycRef<PycString> PycCode::getCellVar(PycModule* mod, int idx) const
? m_freeVars->get(idx - m_cellVars->size()).cast<PycString>()
: m_cellVars->get(idx).cast<PycString>();
}
int _parse_varint(PycBuffer& data, int& pos) {
int b = data.getByte();
pos += 1;
int val = b & 0x3F;
while (b & 0x40) {
val <<= 6;
b = data.getByte();
pos += 1;
val |= (b & 0x3F);
}
return val;
}
std::vector<PycExceptionTableEntry> PycCode::exceptionTableEntries() const
{
PycBuffer data(m_exceptTable->value(), m_exceptTable->length());
std::vector<PycExceptionTableEntry> entries;
int pos = 0;
while (!data.atEof()) {
int start = _parse_varint(data, pos) * 2;
int length = _parse_varint(data, pos) * 2;
int end = start + length;
int target = _parse_varint(data, pos) * 2;
int dl = _parse_varint(data, pos);
int depth = dl >> 1;
bool lasti = bool(dl & 1);
entries.push_back(PycExceptionTableEntry(start, end, target, depth, lasti));
}
return entries;
}

View File

@@ -8,6 +8,18 @@
class PycData;
class PycModule;
class PycExceptionTableEntry {
public:
int start_offset; // inclusive
int end_offset; // exclusive
int target;
int stack_depth;
bool push_lasti;
PycExceptionTableEntry(int m_start_offset, int m_end_offset, int m_target, int m_stack_depth, bool m_push_lasti) :
start_offset(m_start_offset), end_offset(m_end_offset), target(m_target), stack_depth(m_stack_depth), push_lasti(m_push_lasti) {};
};
struct PyarmorCoDescriptor
{
unsigned char flags;
@@ -101,6 +113,8 @@ public:
m_globalsUsed.emplace_back(std::move(varname));
}
std::vector<PycExceptionTableEntry> exceptionTableEntries() const;
private:
int m_argCount, m_posOnlyArgCount, m_kwOnlyArgCount, m_numLocals;
int m_stackSize, m_flags;

View File

@@ -4,6 +4,7 @@
#include <string>
#include <iostream>
#include <fstream>
#include <unordered_set>
#include "pyc_module.h"
#include "pyc_numeric.h"
#include "bytecode.h"
@@ -73,6 +74,8 @@ static void iprintf(std::ostream& pyc_output, int indent, const char* fmt, ...)
va_end(varargs);
}
static std::unordered_set<PycObject *> out_seen;
void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
unsigned flags, std::ostream& pyc_output)
{
@@ -81,6 +84,12 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
return;
}
if (out_seen.find((PycObject *)obj) != out_seen.end()) {
fputs("WARNING: Circular reference detected\n", stderr);
return;
}
out_seen.insert((PycObject *)obj);
switch (obj->type()) {
case PycObject::TYPE_CODE:
case PycObject::TYPE_CODE2:
@@ -145,16 +154,16 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
iputs(pyc_output, indent + 1, "[Disassembly]\n");
bc_disasm(pyc_output, codeObj, mod, indent + 2, flags);
if (mod->verCompare(3, 11) >= 0) {
iputs(pyc_output, indent + 1, "[Exception Table]\n");
bc_exceptiontable(pyc_output, codeObj, indent+2);
}
if (mod->verCompare(1, 5) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) {
iprintf(pyc_output, indent + 1, "First Line: %d\n", codeObj->firstLine());
iputs(pyc_output, indent + 1, "[Line Number Table]\n");
output_object(codeObj->lnTable().cast<PycObject>(), mod, indent + 2, flags, pyc_output);
}
if (mod->verCompare(3, 11) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) {
iputs(pyc_output, indent + 1, "[Exception Table]\n");
output_object(codeObj->exceptTable().cast<PycObject>(), mod, indent + 2, flags, pyc_output);
}
}
break;
case PycObject::TYPE_STRING:
@@ -246,6 +255,8 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
default:
iprintf(pyc_output, indent, "<TYPE: %d>\n", obj->type());
}
out_seen.erase((PycObject *)obj);
}
int main(int argc, char* argv[])