From dc50479d06d3d75c596d42057b734b58e373a51b Mon Sep 17 00:00:00 2001 From: clubby789 Date: Wed, 15 Jun 2022 11:33:11 +0100 Subject: [PATCH] Add support for pycdas/pycdc using marshalled code objects These requires the Python version to be specified as the Pyc header magic is not present. --- pyc_module.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ pyc_module.h | 5 ++++ pycdas.cpp | 44 ++++++++++++++++++++++++++++------ pycdc.cpp | 44 ++++++++++++++++++++++++++++------ 4 files changed, 144 insertions(+), 14 deletions(-) diff --git a/pyc_module.cpp b/pyc_module.cpp index d9d52e5..152c0df 100644 --- a/pyc_module.cpp +++ b/pyc_module.cpp @@ -202,6 +202,23 @@ void PycModule::loadFromFile(const char* filename) m_code = LoadObject(&in, this).require_cast(); } +void PycModule::loadFromMarshalledFile(const char* filename, int major, int minor) { + PycFile in (filename); + if (!in.isOpen()) { + fprintf(stderr, "Error opening file %s\n", filename); + return; + } + PycMagic magic = version_to_magic(major, minor); + if (magic == PycMagic::INVALID) { + fprintf(stderr, "Unsupported version\n"); + return; + } + setVersion(magic); + m_maj = major; + m_min = minor; + m_code = LoadObject(&in, this).require_cast(); +} + PycRef PycModule::getIntern(int ref) const { if (ref < 0) @@ -227,3 +244,51 @@ PycRef PycModule::getRef(int ref) const throw std::out_of_range("Ref index out of range"); return *it; } + +#define enumVariant(mj, mn) case mn: \ + return PycMagic::MAGIC_ ## mj ## _ ## mn + +PycMagic version_to_magic(int major, int minor) { + switch (major) { + case 1: + switch (minor) { + enumVariant(1, 0); + case 1: + case 2: + return PycMagic::MAGIC_1_1; + enumVariant(1, 3); + enumVariant(1, 4); + enumVariant(1, 5); + enumVariant(1, 6); + default: return PycMagic::INVALID; + } + case 2: + switch (minor) { + enumVariant(2, 0); + enumVariant(2, 1); + enumVariant(2, 3); + enumVariant(2, 4); + enumVariant(2, 5); + enumVariant(2, 6); + enumVariant(2, 7); + default: return PycMagic::INVALID; + } + case 3: + switch (minor) { + enumVariant(3, 0); + enumVariant(3, 1); + enumVariant(3, 3); + enumVariant(3, 4); + enumVariant(3, 5); + enumVariant(3, 6); + enumVariant(3, 7); + enumVariant(3, 8); + enumVariant(3, 9); + enumVariant(3, 10); + default: return PycMagic::INVALID; + } + default: + return PycMagic::INVALID; + } + } +#undef enumVariant \ No newline at end of file diff --git a/pyc_module.h b/pyc_module.h index 05046e7..22830f5 100644 --- a/pyc_module.h +++ b/pyc_module.h @@ -33,13 +33,18 @@ enum PycMagic { MAGIC_3_8 = 0x0A0D0D55, MAGIC_3_9 = 0x0A0D0D61, MAGIC_3_10 = 0x0A0D0D6F, + + INVALID = 0, }; +PycMagic version_to_magic(int major, int minor); + class PycModule { public: PycModule() : m_maj(-1), m_min(-1), m_unicode(false) { } void loadFromFile(const char* filename); + void loadFromMarshalledFile(const char *filename, int major, int minor); bool isValid() const { return (m_maj >= 0) && (m_min >= 0); } int majorVer() const { return m_maj; } diff --git a/pycdas.cpp b/pycdas.cpp index 1bf4288..3f93104 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "pyc_module.h" #include "pyc_numeric.h" #include "bytecode.h" @@ -232,6 +233,8 @@ void output_object(PycRef obj, PycModule* mod, int indent) int main(int argc, char* argv[]) { const char* infile = nullptr; + bool marshalled = false; + const char* version = nullptr; for (int arg = 1; arg < argc; ++arg) { if (strcmp(argv[arg], "-o") == 0) { if (arg + 1 < argc) { @@ -247,11 +250,22 @@ int main(int argc, char* argv[]) fputs("Option '-o' requires a filename\n", stderr); return 1; } + } else if (strcmp(argv[arg], "-c") == 0) { + marshalled = true; + } else if (strcmp(argv[arg], "-v") == 0) { + if (arg + 1 < argc) { + version = argv[++arg]; + } else { + fputs("Option '-v' requires a version\n", stderr); + return 1; + } } else if (strcmp(argv[arg], "--help") == 0 || strcmp(argv[arg], "-h") == 0) { fprintf(stderr, "Usage: %s [options] input.pyc\n\n", argv[0]); fputs("Options:\n", stderr); - fputs(" -o Write output to (default: stdout)\n", stderr); - fputs(" --help Show this help text and then exit\n", stderr); + fputs(" -o Write output to (default: stdout)\n", stderr); + fputs(" -c Specify loading a compiled code object. Requires the version to be set\n", stderr); + fputs(" -v Specify a Python version\n", stderr); + fputs(" --help Show this help text and then exit\n", stderr); return 0; } else { infile = argv[arg]; @@ -264,11 +278,27 @@ int main(int argc, char* argv[]) } PycModule mod; - try { - mod.loadFromFile(infile); - } catch (std::exception& ex) { - fprintf(stderr, "Error disassembling %s: %s\n", infile, ex.what()); - return 1; + if (!marshalled) { + try { + mod.loadFromFile(infile); + } catch (std::exception &ex) { + fprintf(stderr, "Error disassembling %s: %s\n", infile, ex.what()); + return 1; + } + } else { + if (!version) { + fputs("Opening raw code objects requires a version to be specified\n", stderr); + return 1; + } + std::string s(version); + auto dot = s.find('.'); + if (dot == std::string::npos || dot == s.size()-1) { + fputs("Unable to read version string (use the format x.y)\n", stderr); + return 1; + } + int major = std::stoi(s.substr(0, dot)); + int minor = std::stoi(s.substr(dot+1, s.size())); + mod.loadFromMarshalledFile(infile, major, minor); } const char* dispname = strrchr(infile, PATHSEP); dispname = (dispname == NULL) ? infile : dispname + 1; diff --git a/pycdc.cpp b/pycdc.cpp index 97a0eae..103ee4f 100644 --- a/pycdc.cpp +++ b/pycdc.cpp @@ -10,6 +10,8 @@ int main(int argc, char* argv[]) { const char* infile = nullptr; + bool marshalled = false; + const char* version = nullptr; for (int arg = 1; arg < argc; ++arg) { if (strcmp(argv[arg], "-o") == 0) { if (arg + 1 < argc) { @@ -25,11 +27,22 @@ int main(int argc, char* argv[]) fputs("Option '-o' requires a filename\n", stderr); return 1; } + } else if (strcmp(argv[arg], "-c") == 0) { + marshalled = true; + } else if (strcmp(argv[arg], "-v") == 0) { + if (arg + 1 < argc) { + version = argv[++arg]; + } else { + fputs("Option '-v' requires a version\n", stderr); + return 1; + } } else if (strcmp(argv[arg], "--help") == 0 || strcmp(argv[arg], "-h") == 0) { fprintf(stderr, "Usage: %s [options] input.pyc\n\n", argv[0]); fputs("Options:\n", stderr); - fputs(" -o Write output to (default: stdout)\n", stderr); - fputs(" --help Show this help text and then exit\n", stderr); + fputs(" -o Write output to (default: stdout)\n", stderr); + fputs(" -c Specify loading a compiled code object. Requires the version to be set\n", stderr); + fputs(" -v Specify a Python version\n", stderr); + fputs(" --help Show this help text and then exit\n", stderr); return 0; } else { infile = argv[arg]; @@ -42,12 +55,29 @@ int main(int argc, char* argv[]) } PycModule mod; - try { - mod.loadFromFile(infile); - } catch (std::exception& ex) { - fprintf(stderr, "Error loading file %s: %s\n", infile, ex.what()); - return 1; + if (!marshalled) { + try { + mod.loadFromFile(infile); + } catch (std::exception& ex) { + fprintf(stderr, "Error loading file %s: %s\n", infile, ex.what()); + return 1; + } + } else { + if (!version) { + fputs("Opening raw code objects requires a version to be specified\n", stderr); + return 1; + } + std::string s(version); + auto dot = s.find('.'); + if (dot == std::string::npos || dot == s.size()-1) { + fputs("Unable to read version string (use the format x.y)\n", stderr); + return 1; + } + int major = std::stoi(s.substr(0, dot)); + int minor = std::stoi(s.substr(dot+1, s.size())); + mod.loadFromMarshalledFile(infile, major, minor); } + if (!mod.isValid()) { fprintf(stderr, "Could not load file %s\n", infile); return 1;