diff --git a/ASTree.cpp b/ASTree.cpp index 7cc746d..f86b9c0 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -2510,6 +2510,12 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) case Pyc::SETUP_ANNOTATIONS: variable_annotations = true; break; + case Pyc::CACHE: + /* These "fake" opcodes are used as placeholders for optimizing + certain opcodes in Python 3.11+. Since we have no need for + that during disassembly/decompilation, we can just treat these + as no-ops. */ + break; default: fprintf(stderr, "Unsupported opcode: %s\n", Pyc::OpcodeName(opcode & 0xFF)); cleanBuild = false; diff --git a/bytecode.cpp b/bytecode.cpp index c00fe05..607ddf7 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -345,7 +345,7 @@ void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& } } -void bc_disasm(PycRef code, PycModule* mod, int indent) +void bc_disasm(PycRef code, PycModule* mod, int indent, unsigned flags) { static const char *cmp_strings[] = { "<", "<=", "==", "!=", ">", ">=", "in", "not in", "is", "is not", @@ -358,12 +358,14 @@ void bc_disasm(PycRef code, PycModule* mod, int indent) int opcode, operand; int pos = 0; while (!source.atEof()) { + int start_pos = pos; + bc_next(source, mod, opcode, operand, pos); + if (opcode == Pyc::CACHE && (flags & Pyc::DISASM_SHOW_CACHES) == 0) + continue; + for (int i=0; i= Pyc::PYC_HAVE_ARG) { if (Pyc::IsConstArg(opcode)) { diff --git a/bytecode.h b/bytecode.h index a9dc150..06313bc 100644 --- a/bytecode.h +++ b/bytecode.h @@ -17,6 +17,11 @@ enum Opcode { PYC_INVALID_OPCODE = -1, }; +enum DisassemblyFlags { + DISASM_PYCODE_VERBOSE = 0x1, + DISASM_SHOW_CACHES = 0x2, +}; + const char* OpcodeName(int opcode); int ByteToOpcode(int maj, int min, int opcode); @@ -32,4 +37,4 @@ bool IsCompareArg(int opcode); void print_const(PycRef obj, PycModule* mod, const char* parent_f_string_quote = nullptr); void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos); -void bc_disasm(PycRef code, PycModule* mod, int indent); +void bc_disasm(PycRef code, PycModule* mod, int indent, unsigned flags); diff --git a/pycdas.cpp b/pycdas.cpp index 5e32b71..2be7a4f 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -12,9 +12,6 @@ # define PATHSEP '/' #endif -// Set this to 1 to print extra details on PyCode objects -#define PRINT_EXTRA_PYCODE_FIELDS 0 - static const char* flag_names[] = { "CO_OPTIMIZED", "CO_NEWLOCALS", "CO_VARARGS", "CO_VARKEYWORDS", "CO_NESTED", "CO_GENERATOR", "CO_NOFREE", "CO_COROUTINE", @@ -74,7 +71,8 @@ static void iprintf(int indent, const char* fmt, ...) va_end(varargs); } -void output_object(PycRef obj, PycModule* mod, int indent) +void output_object(PycRef obj, PycModule* mod, int indent, + unsigned flags) { if (obj == NULL) { iputs(indent, ""); @@ -107,7 +105,7 @@ void output_object(PycRef obj, PycModule* mod, int indent) iputs(indent + 1, "[Names]\n"); for (int i=0; inames()->size(); i++) - output_object(codeObj->names()->get(i), mod, indent + 2); + output_object(codeObj->names()->get(i), mod, indent + 2, flags); if (mod->verCompare(1, 3) >= 0 && mod->verCompare(3, 11) < 0) { if (mod->verCompare(3, 11) >= 0) @@ -115,44 +113,40 @@ void output_object(PycRef obj, PycModule* mod, int indent) else iputs(indent + 1, "[Var Names]\n"); for (int i=0; ilocalNames()->size(); i++) - output_object(codeObj->localNames()->get(i), mod, indent + 2); + output_object(codeObj->localNames()->get(i), mod, indent + 2, flags); } -#if PRINT_EXTRA_PYCODE_FIELDS - if (mod->verCompare(3, 11) >= 0) { + if (mod->verCompare(3, 11) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) { iputs(indent + 1, "[Locals+Kinds]\n"); - output_object(codeObj->localKinds().cast(), mod, indent + 2); + output_object(codeObj->localKinds().cast(), mod, indent + 2, flags); } -#endif if (mod->verCompare(2, 1) >= 0 && mod->verCompare(3, 11) < 0) { iputs(indent + 1, "[Free Vars]\n"); for (int i=0; ifreeVars()->size(); i++) - output_object(codeObj->freeVars()->get(i), mod, indent + 2); + output_object(codeObj->freeVars()->get(i), mod, indent + 2, flags); iputs(indent + 1, "[Cell Vars]\n"); for (int i=0; icellVars()->size(); i++) - output_object(codeObj->cellVars()->get(i), mod, indent + 2); + output_object(codeObj->cellVars()->get(i), mod, indent + 2, flags); } iputs(indent + 1, "[Constants]\n"); for (int i=0; iconsts()->size(); i++) - output_object(codeObj->consts()->get(i), mod, indent + 2); + output_object(codeObj->consts()->get(i), mod, indent + 2, flags); iputs(indent + 1, "[Disassembly]\n"); - bc_disasm(codeObj, mod, indent + 2); + bc_disasm(codeObj, mod, indent + 2, flags); -#if PRINT_EXTRA_PYCODE_FIELDS - if (mod->verCompare(1, 5) >= 0) { + if (mod->verCompare(1, 5) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) { iputs(indent + 1, "[Line Number Table]\n"); - output_object(codeObj->lnTable().cast(), mod, indent + 2); + output_object(codeObj->lnTable().cast(), mod, indent + 2, flags); } - if (mod->verCompare(3, 11) >= 0) { + if (mod->verCompare(3, 11) >= 0 && (flags & Pyc::DISASM_PYCODE_VERBOSE) != 0) { iputs(indent + 1, "[Exception Table]\n"); - output_object(codeObj->exceptTable().cast(), mod, indent + 2); + output_object(codeObj->exceptTable().cast(), mod, indent + 2, flags); } -#endif } break; case PycObject::TYPE_STRING: @@ -182,7 +176,7 @@ void output_object(PycRef obj, PycModule* mod, int indent) { iputs(indent, "(\n"); for (const auto& val : obj.cast()->values()) - output_object(val, mod, indent + 1); + output_object(val, mod, indent + 1, flags); iputs(indent, ")\n"); } break; @@ -190,7 +184,7 @@ void output_object(PycRef obj, PycModule* mod, int indent) { iputs(indent, "[\n"); for (const auto& val : obj.cast()->values()) - output_object(val, mod, indent + 1); + output_object(val, mod, indent + 1, flags); iputs(indent, "]\n"); } break; @@ -202,8 +196,8 @@ void output_object(PycRef obj, PycModule* mod, int indent) PycDict::key_t::const_iterator ki = keys.begin(); PycDict::value_t::const_iterator vi = values.begin(); while (ki != keys.end()) { - output_object(*ki, mod, indent + 1); - output_object(*vi, mod, indent + 2); + output_object(*ki, mod, indent + 1, flags); + output_object(*vi, mod, indent + 2, flags); ++ki, ++vi; } iputs(indent, "}\n"); @@ -213,7 +207,7 @@ void output_object(PycRef obj, PycModule* mod, int indent) { iputs(indent, "{\n"); for (const auto& val : obj.cast()->values()) - output_object(val, mod, indent + 1); + output_object(val, mod, indent + 1, flags); iputs(indent, "}\n"); } break; @@ -259,6 +253,8 @@ int main(int argc, char* argv[]) const char* infile = nullptr; bool marshalled = false; const char* version = nullptr; + unsigned disasm_flags = 0; + for (int arg = 1; arg < argc; ++arg) { if (strcmp(argv[arg], "-o") == 0) { if (arg + 1 < argc) { @@ -283,14 +279,23 @@ int main(int argc, char* argv[]) fputs("Option '-v' requires a version\n", stderr); return 1; } + } else if (strcmp(argv[arg], "--pycode-extra") == 0) { + disasm_flags |= Pyc::DISASM_PYCODE_VERBOSE; + } else if (strcmp(argv[arg], "--show-caches") == 0) { + disasm_flags |= Pyc::DISASM_SHOW_CACHES; } else if (strcmp(argv[arg], "--help") == 0 || strcmp(argv[arg], "-h") == 0) { fprintf(stderr, "Usage: %s [options] input.pyc\n\n", argv[0]); fputs("Options:\n", stderr); fputs(" -o Write output to (default: stdout)\n", stderr); fputs(" -c Specify loading a compiled code object. Requires the version to be set\n", stderr); fputs(" -v Specify a Python version for loading a compiled code object\n", stderr); + fputs(" --pycode-extra Show extra fields in PyCode object dumps\n", stderr); + fputs(" --show-caches Don't suprress CACHE instructions in Python 3.11+ disassembly\n", stderr); fputs(" --help Show this help text and then exit\n", stderr); return 0; + } else if (argv[arg][0] == '-') { + fprintf(stderr, "Error: Unrecognized argument %s\n", argv[arg]); + return 1; } else { infile = argv[arg]; } @@ -329,7 +334,7 @@ int main(int argc, char* argv[]) fprintf(pyc_output, "%s (Python %d.%d%s)\n", dispname, mod.majorVer(), mod.minorVer(), (mod.majorVer() < 3 && mod.isUnicode()) ? " -U" : ""); try { - output_object(mod.code().try_cast(), &mod, 0); + output_object(mod.code().try_cast(), &mod, 0, disasm_flags); } catch (std::exception& ex) { fprintf(stderr, "Error disassembling %s: %s\n", infile, ex.what()); return 1;