2011-10-23 17:48:10 -07:00
|
|
|
#include "pyc_string.h"
|
|
|
|
#include "pyc_module.h"
|
2009-07-24 08:35:21 +00:00
|
|
|
#include "data.h"
|
2022-11-30 15:05:29 -08:00
|
|
|
#include <stdexcept>
|
2009-07-24 08:35:21 +00:00
|
|
|
|
2022-11-30 15:05:29 -08:00
|
|
|
static bool check_ascii(const std::string& data)
|
2014-01-21 00:07:34 -08:00
|
|
|
{
|
2022-11-30 15:05:29 -08:00
|
|
|
auto cp = reinterpret_cast<const unsigned char*>(data.c_str());
|
2014-01-21 00:07:34 -08:00
|
|
|
while (*cp) {
|
|
|
|
if (*cp & 0x80)
|
2022-11-30 15:05:29 -08:00
|
|
|
return false;
|
2014-01-21 00:07:34 -08:00
|
|
|
++cp;
|
|
|
|
}
|
2022-11-30 15:05:29 -08:00
|
|
|
return true;
|
2014-01-21 00:07:34 -08:00
|
|
|
}
|
|
|
|
|
2009-07-24 08:35:21 +00:00
|
|
|
/* PycString */
|
|
|
|
void PycString::load(PycData* stream, PycModule* mod)
|
|
|
|
{
|
|
|
|
if (type() == TYPE_STRINGREF) {
|
|
|
|
PycRef<PycString> str = mod->getIntern(stream->get32());
|
2022-12-01 11:42:31 -08:00
|
|
|
m_type = str->m_type;
|
|
|
|
m_value = str->m_value;
|
2009-07-24 08:35:21 +00:00
|
|
|
} else {
|
2019-10-02 16:01:54 -07:00
|
|
|
int length;
|
2014-01-21 00:07:34 -08:00
|
|
|
if (type() == TYPE_SHORT_ASCII || type() == TYPE_SHORT_ASCII_INTERNED)
|
2019-10-02 16:01:54 -07:00
|
|
|
length = stream->getByte();
|
2014-01-21 00:07:34 -08:00
|
|
|
else
|
2019-10-02 16:01:54 -07:00
|
|
|
length = stream->get32();
|
2014-01-21 00:07:34 -08:00
|
|
|
|
2019-10-02 16:01:54 -07:00
|
|
|
if (length < 0)
|
2018-01-28 14:33:26 -08:00
|
|
|
throw std::bad_alloc();
|
|
|
|
|
2019-10-02 16:01:54 -07:00
|
|
|
m_value.resize(length);
|
|
|
|
if (length) {
|
|
|
|
stream->getBuffer(length, &m_value.front());
|
2014-01-21 00:07:34 -08:00
|
|
|
if (type() == TYPE_ASCII || type() == TYPE_ASCII_INTERNED ||
|
2022-11-30 15:05:29 -08:00
|
|
|
type() == TYPE_SHORT_ASCII || type() == TYPE_SHORT_ASCII_INTERNED) {
|
|
|
|
if (!check_ascii(m_value))
|
|
|
|
throw std::runtime_error("Invalid bytes in ASCII string");
|
|
|
|
}
|
2009-07-24 08:35:21 +00:00
|
|
|
}
|
|
|
|
|
2014-01-21 00:07:34 -08:00
|
|
|
if (type() == TYPE_INTERNED || type() == TYPE_ASCII_INTERNED ||
|
|
|
|
type() == TYPE_SHORT_ASCII_INTERNED)
|
2009-07-24 08:35:21 +00:00
|
|
|
mod->intern(this);
|
|
|
|
}
|
|
|
|
}
|
2009-07-24 19:52:47 +00:00
|
|
|
|
2009-07-24 21:15:51 +00:00
|
|
|
bool PycString::isEqual(PycRef<PycObject> obj) const
|
|
|
|
{
|
2017-07-05 16:36:04 -07:00
|
|
|
if (type() != obj.type())
|
2009-07-25 02:41:15 +00:00
|
|
|
return false;
|
|
|
|
|
2009-07-24 21:15:51 +00:00
|
|
|
PycRef<PycString> strObj = obj.cast<PycString>();
|
2009-07-27 00:23:49 +00:00
|
|
|
return isEqual(strObj->m_value);
|
2009-07-24 21:15:51 +00:00
|
|
|
}
|
|
|
|
|
2023-06-09 09:09:03 -07:00
|
|
|
void PycString::print(std::ostream &pyc_output, PycModule* mod, bool triple,
|
|
|
|
const char* parent_f_string_quote)
|
2009-07-24 19:52:47 +00:00
|
|
|
{
|
2023-06-09 09:09:03 -07:00
|
|
|
char prefix = 0;
|
|
|
|
switch (type()) {
|
|
|
|
case TYPE_STRING:
|
|
|
|
prefix = mod->strIsUnicode() ? 'b' : 0;
|
|
|
|
break;
|
|
|
|
case PycObject::TYPE_UNICODE:
|
|
|
|
prefix = mod->strIsUnicode() ? 0 : 'u';
|
|
|
|
break;
|
|
|
|
case PycObject::TYPE_INTERNED:
|
|
|
|
case PycObject::TYPE_ASCII:
|
|
|
|
case PycObject::TYPE_ASCII_INTERNED:
|
|
|
|
case PycObject::TYPE_SHORT_ASCII:
|
|
|
|
case PycObject::TYPE_SHORT_ASCII_INTERNED:
|
|
|
|
if (mod->majorVer() >= 3)
|
|
|
|
prefix = 0;
|
|
|
|
else
|
|
|
|
prefix = mod->strIsUnicode() ? 'b' : 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw std::runtime_error("Invalid string type");
|
|
|
|
}
|
|
|
|
|
2009-08-03 23:13:50 +00:00
|
|
|
if (prefix != 0)
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << prefix;
|
2009-08-03 23:13:50 +00:00
|
|
|
|
2023-06-09 09:09:03 -07:00
|
|
|
if (m_value.empty()) {
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << "''";
|
2009-07-24 19:52:47 +00:00
|
|
|
return;
|
2009-08-03 23:13:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Determine preferred quote style (Emulate Python's method)
|
|
|
|
bool useQuotes = false;
|
2020-10-17 21:04:39 +11:00
|
|
|
if (!parent_f_string_quote) {
|
2023-06-09 09:09:03 -07:00
|
|
|
for (char ch : m_value) {
|
|
|
|
if (ch == '\'') {
|
2020-10-17 20:52:57 +11:00
|
|
|
useQuotes = true;
|
2023-06-09 09:09:03 -07:00
|
|
|
} else if (ch == '"') {
|
2020-10-17 20:52:57 +11:00
|
|
|
useQuotes = false;
|
|
|
|
break;
|
|
|
|
}
|
2009-08-03 23:13:50 +00:00
|
|
|
}
|
2020-10-20 21:08:02 -07:00
|
|
|
} else {
|
2020-10-17 20:52:57 +11:00
|
|
|
useQuotes = parent_f_string_quote[0] == '"';
|
2009-08-03 23:13:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Output the string
|
2020-10-17 20:52:57 +11:00
|
|
|
if (!parent_f_string_quote) {
|
|
|
|
if (triple)
|
2023-06-06 11:20:32 -07:00
|
|
|
pyc_output << (useQuotes ? R"(""")" : "'''");
|
2020-10-17 20:52:57 +11:00
|
|
|
else
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << (useQuotes ? '"' : '\'');
|
2020-10-17 20:52:57 +11:00
|
|
|
}
|
2023-06-09 09:09:03 -07:00
|
|
|
for (char ch : m_value) {
|
|
|
|
if (static_cast<unsigned char>(ch) < 0x20 || ch == 0x7F) {
|
|
|
|
if (ch == '\r') {
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << "\\r";
|
2023-06-09 09:09:03 -07:00
|
|
|
} else if (ch == '\n') {
|
2009-08-03 23:13:50 +00:00
|
|
|
if (triple)
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << '\n';
|
2009-07-24 19:52:47 +00:00
|
|
|
else
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << "\\n";
|
2023-06-09 09:09:03 -07:00
|
|
|
} else if (ch == '\t') {
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << "\\t";
|
2009-07-24 19:52:47 +00:00
|
|
|
} else {
|
2023-06-09 09:09:03 -07:00
|
|
|
formatted_print(pyc_output, "\\x%02x", (ch & 0xFF));
|
2009-07-24 19:52:47 +00:00
|
|
|
}
|
2023-06-09 09:09:03 -07:00
|
|
|
} else if (static_cast<unsigned char>(ch) >= 0x80) {
|
|
|
|
if (type() == TYPE_UNICODE) {
|
2009-07-25 00:02:31 +00:00
|
|
|
// Unicode stored as UTF-8... Let the stream interpret it
|
2023-06-09 09:09:03 -07:00
|
|
|
pyc_output << ch;
|
2009-07-25 00:02:31 +00:00
|
|
|
} else {
|
2023-06-09 09:09:03 -07:00
|
|
|
formatted_print(pyc_output, "\\x%x", (ch & 0xFF));
|
2009-07-25 00:02:31 +00:00
|
|
|
}
|
2009-07-24 19:52:47 +00:00
|
|
|
} else {
|
2023-06-09 09:09:03 -07:00
|
|
|
if (!useQuotes && ch == '\'')
|
2023-06-06 11:20:32 -07:00
|
|
|
pyc_output << R"(\')";
|
2023-06-09 09:09:03 -07:00
|
|
|
else if (useQuotes && ch == '"')
|
2023-06-06 11:20:32 -07:00
|
|
|
pyc_output << R"(\")";
|
2023-06-09 09:09:03 -07:00
|
|
|
else if (ch == '\\')
|
2023-06-06 11:20:32 -07:00
|
|
|
pyc_output << R"(\\)";
|
2023-06-09 09:09:03 -07:00
|
|
|
else if (parent_f_string_quote && ch == '{')
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << "{{";
|
2023-06-09 09:09:03 -07:00
|
|
|
else if (parent_f_string_quote && ch == '}')
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << "}}";
|
2009-07-24 19:52:47 +00:00
|
|
|
else
|
2023-06-09 09:09:03 -07:00
|
|
|
pyc_output << ch;
|
2009-07-24 19:52:47 +00:00
|
|
|
}
|
|
|
|
}
|
2020-10-17 20:52:57 +11:00
|
|
|
if (!parent_f_string_quote) {
|
2020-10-17 21:04:39 +11:00
|
|
|
if (triple)
|
2023-06-06 11:20:32 -07:00
|
|
|
pyc_output << (useQuotes ? R"(""")" : "'''");
|
2020-10-17 21:04:39 +11:00
|
|
|
else
|
2023-06-02 00:36:58 +02:00
|
|
|
pyc_output << (useQuotes ? '"' : '\'');
|
2020-10-17 21:04:39 +11:00
|
|
|
}
|
2009-07-24 19:52:47 +00:00
|
|
|
}
|