From 753d42d94acdef4e173c61971dea924b5cc81677 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Fri, 9 Jun 2023 09:09:03 -0700 Subject: [PATCH] Simplify string object printing to reduce code duplication --- ASTree.cpp | 28 ++++---------------- bytecode.cpp | 12 +-------- pyc_string.cpp | 72 ++++++++++++++++++++++++++++++-------------------- pyc_string.h | 6 ++--- pycdas.cpp | 13 +-------- 5 files changed, 54 insertions(+), 77 deletions(-) diff --git a/ASTree.cpp b/ASTree.cpp index e7c9262..4245a69 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -3224,32 +3224,14 @@ bool print_docstring(PycRef obj, int indent, PycModule* mod, std::ostream& pyc_output) { // docstrings are translated from the bytecode __doc__ = 'string' to simply '''string''' - signed char prefix = -1; - switch (obj.type()) { - case PycObject::TYPE_STRING: - prefix = mod->strIsUnicode() ? 'b' : 0; - break; - case PycObject::TYPE_UNICODE: - prefix = mod->strIsUnicode() ? 0 : 'u'; - break; - case PycObject::TYPE_INTERNED: - case PycObject::TYPE_ASCII: - case PycObject::TYPE_ASCII_INTERNED: - case PycObject::TYPE_SHORT_ASCII: - case PycObject::TYPE_SHORT_ASCII_INTERNED: - if (mod->majorVer() >= 3) - prefix = 0; - else - prefix = mod->strIsUnicode() ? 'b' : 0; - break; - } - if (prefix != -1) { + auto doc = obj.try_cast(); + if (doc != nullptr) { start_line(indent, pyc_output); - OutputString(pyc_output, obj.cast(), prefix, true); + doc->print(pyc_output, mod, true); pyc_output << "\n"; return true; - } else - return false; + } + return false; } void decompyle(PycRef code, PycModule* mod, std::ostream& pyc_output) diff --git a/bytecode.cpp b/bytecode.cpp index b3bb136..cad4ab0 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -170,23 +170,13 @@ void print_const(std::ostream& pyc_output, PycRef obj, PycModule* mod switch (obj->type()) { case PycObject::TYPE_STRING: - OutputString(pyc_output, obj.cast(), mod->strIsUnicode() ? 'b' : 0, - false, parent_f_string_quote); - break; case PycObject::TYPE_UNICODE: - OutputString(pyc_output, obj.cast(), mod->strIsUnicode() ? 0 : 'u', - false, parent_f_string_quote); - break; case PycObject::TYPE_INTERNED: case PycObject::TYPE_ASCII: case PycObject::TYPE_ASCII_INTERNED: case PycObject::TYPE_SHORT_ASCII: case PycObject::TYPE_SHORT_ASCII_INTERNED: - if (mod->majorVer() >= 3) - OutputString(pyc_output, obj.cast(), 0, false, parent_f_string_quote); - else - OutputString(pyc_output, obj.cast(), mod->strIsUnicode() ? 'b' : 0, - false, parent_f_string_quote); + obj.cast()->print(pyc_output, mod, false, parent_f_string_quote); break; case PycObject::TYPE_TUPLE: case PycObject::TYPE_SMALL_TUPLE: diff --git a/pyc_string.cpp b/pyc_string.cpp index 4b18b49..e43e190 100644 --- a/pyc_string.cpp +++ b/pyc_string.cpp @@ -56,15 +56,35 @@ bool PycString::isEqual(PycRef obj) const return isEqual(strObj->m_value); } -void OutputString(std::ostream &pyc_output, PycRef str, char prefix, - bool triple, const char* parent_f_string_quote) +void PycString::print(std::ostream &pyc_output, PycModule* mod, bool triple, + const char* parent_f_string_quote) { + char prefix = 0; + switch (type()) { + case TYPE_STRING: + prefix = mod->strIsUnicode() ? 'b' : 0; + break; + case PycObject::TYPE_UNICODE: + prefix = mod->strIsUnicode() ? 0 : 'u'; + break; + case PycObject::TYPE_INTERNED: + case PycObject::TYPE_ASCII: + case PycObject::TYPE_ASCII_INTERNED: + case PycObject::TYPE_SHORT_ASCII: + case PycObject::TYPE_SHORT_ASCII_INTERNED: + if (mod->majorVer() >= 3) + prefix = 0; + else + prefix = mod->strIsUnicode() ? 'b' : 0; + break; + default: + throw std::runtime_error("Invalid string type"); + } + if (prefix != 0) pyc_output << prefix; - const char* ch = str->value(); - int len = str->length(); - if (len == 0) { + if (m_value.empty()) { pyc_output << "''"; return; } @@ -72,20 +92,17 @@ void OutputString(std::ostream &pyc_output, PycRef str, char prefix, // Determine preferred quote style (Emulate Python's method) bool useQuotes = false; if (!parent_f_string_quote) { - while (len--) { - if (*ch == '\'') { + for (char ch : m_value) { + if (ch == '\'') { useQuotes = true; - } else if (*ch == '"') { + } else if (ch == '"') { useQuotes = false; break; } - ch++; } } else { useQuotes = parent_f_string_quote[0] == '"'; } - ch = str->value(); - len = str->length(); // Output the string if (!parent_f_string_quote) { @@ -94,42 +111,41 @@ void OutputString(std::ostream &pyc_output, PycRef str, char prefix, else pyc_output << (useQuotes ? '"' : '\''); } - while (len--) { - if ((unsigned char)(*ch) < 0x20 || *ch == 0x7F) { - if (*ch == '\r') { + for (char ch : m_value) { + if (static_cast(ch) < 0x20 || ch == 0x7F) { + if (ch == '\r') { pyc_output << "\\r"; - } else if (*ch == '\n') { + } else if (ch == '\n') { if (triple) pyc_output << '\n'; else pyc_output << "\\n"; - } else if (*ch == '\t') { + } else if (ch == '\t') { pyc_output << "\\t"; } else { - formatted_print(pyc_output, "\\x%02x", (*ch & 0xFF)); + formatted_print(pyc_output, "\\x%02x", (ch & 0xFF)); } - } else if ((unsigned char)(*ch) >= 0x80) { - if (str->type() == PycObject::TYPE_UNICODE) { + } else if (static_cast(ch) >= 0x80) { + if (type() == TYPE_UNICODE) { // Unicode stored as UTF-8... Let the stream interpret it - pyc_output << *ch; + pyc_output << ch; } else { - formatted_print(pyc_output, "\\x%x", (*ch & 0xFF)); + formatted_print(pyc_output, "\\x%x", (ch & 0xFF)); } } else { - if (!useQuotes && *ch == '\'') + if (!useQuotes && ch == '\'') pyc_output << R"(\')"; - else if (useQuotes && *ch == '"') + else if (useQuotes && ch == '"') pyc_output << R"(\")"; - else if (*ch == '\\') + else if (ch == '\\') pyc_output << R"(\\)"; - else if (parent_f_string_quote && *ch == '{') + else if (parent_f_string_quote && ch == '{') pyc_output << "{{"; - else if (parent_f_string_quote && *ch == '}') + else if (parent_f_string_quote && ch == '}') pyc_output << "}}"; else - pyc_output << *ch; + pyc_output << ch; } - ch++; } if (!parent_f_string_quote) { if (triple) diff --git a/pyc_string.h b/pyc_string.h index c01e3c9..43ae2ed 100644 --- a/pyc_string.h +++ b/pyc_string.h @@ -27,11 +27,11 @@ public: void setValue(std::string str) { m_value = std::move(str); } + void print(std::ostream& stream, class PycModule* mod, bool triple = false, + const char* parent_f_string_quote = nullptr); + private: std::string m_value; }; -void OutputString(std::ostream& stream, PycRef str, char prefix, - bool triple = false, const char* parent_f_string_quote = nullptr); - #endif diff --git a/pycdas.cpp b/pycdas.cpp index bebf231..1275f00 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -153,25 +153,14 @@ void output_object(PycRef obj, PycModule* mod, int indent, } break; case PycObject::TYPE_STRING: - iputs(pyc_output, indent, ""); - OutputString(pyc_output, obj.cast(), mod->strIsUnicode() ? 'b' : 0); - pyc_output << "\n"; - break; case PycObject::TYPE_UNICODE: - iputs(pyc_output, indent, ""); - OutputString(pyc_output, obj.cast(), mod->strIsUnicode() ? 0 : 'u'); - pyc_output << "\n"; - break; case PycObject::TYPE_INTERNED: case PycObject::TYPE_ASCII: case PycObject::TYPE_ASCII_INTERNED: case PycObject::TYPE_SHORT_ASCII: case PycObject::TYPE_SHORT_ASCII_INTERNED: iputs(pyc_output, indent, ""); - if (mod->majorVer() >= 3) - OutputString(pyc_output, obj.cast(), 0); - else - OutputString(pyc_output, obj.cast(), mod->strIsUnicode() ? 'b' : 0); + obj.cast()->print(pyc_output, mod); pyc_output << "\n"; break; case PycObject::TYPE_TUPLE: