2011-10-23 17:48:10 -07:00
|
|
|
#include "pyc_string.h"
|
|
|
|
#include "pyc_module.h"
|
2009-07-24 08:35:21 +00:00
|
|
|
#include "data.h"
|
|
|
|
#include <cstring>
|
|
|
|
|
2014-01-21 00:07:34 -08:00
|
|
|
static void ascii_to_utf8(char** data)
|
|
|
|
{
|
|
|
|
size_t utf8len = 0, asciilen = 0;
|
|
|
|
unsigned char* cp = reinterpret_cast<unsigned char*>(*data);
|
|
|
|
while (*cp) {
|
|
|
|
if (*cp & 0x80)
|
|
|
|
utf8len += 2;
|
|
|
|
else
|
|
|
|
utf8len += 1;
|
|
|
|
|
|
|
|
// Advance ASCII pointer
|
|
|
|
++asciilen;
|
|
|
|
++cp;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (asciilen == utf8len) {
|
|
|
|
// This can only happen if all characters are [0x00-0x7f].
|
|
|
|
// If that happens, we don't need to do any conversion, nor
|
|
|
|
// reallocate any buffers. Woot!
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
char* utf8_buffer = new char[utf8len + 1];
|
|
|
|
unsigned char* up = reinterpret_cast<unsigned char*>(utf8_buffer);
|
|
|
|
cp = reinterpret_cast<unsigned char*>(*data);
|
|
|
|
while (*cp) {
|
|
|
|
if (*cp & 0x80) {
|
|
|
|
*up++ = 0xC0 | ((*cp >> 6) & 0x1F);
|
|
|
|
*up++ = 0x80 | ((*cp ) & 0x3F);
|
|
|
|
} else {
|
|
|
|
*up++ = *cp;
|
|
|
|
}
|
|
|
|
++cp;
|
|
|
|
}
|
|
|
|
|
|
|
|
utf8_buffer[utf8len] = 0;
|
|
|
|
delete[] *data;
|
|
|
|
*data = utf8_buffer;
|
|
|
|
}
|
|
|
|
|
2009-07-24 08:35:21 +00:00
|
|
|
/* PycString */
|
|
|
|
void PycString::load(PycData* stream, PycModule* mod)
|
|
|
|
{
|
2014-01-20 22:21:56 -08:00
|
|
|
delete[] m_value;
|
2009-07-24 08:35:21 +00:00
|
|
|
|
|
|
|
if (type() == TYPE_STRINGREF) {
|
|
|
|
PycRef<PycString> str = mod->getIntern(stream->get32());
|
|
|
|
m_length = str->length();
|
|
|
|
if (m_length) {
|
|
|
|
m_value = new char[m_length+1];
|
|
|
|
memcpy(m_value, str->value(), m_length);
|
|
|
|
m_value[m_length] = 0;
|
|
|
|
} else {
|
|
|
|
m_value = 0;
|
|
|
|
}
|
|
|
|
} else {
|
2014-01-21 00:07:34 -08:00
|
|
|
if (type() == TYPE_SHORT_ASCII || type() == TYPE_SHORT_ASCII_INTERNED)
|
|
|
|
m_length = stream->getByte();
|
|
|
|
else
|
|
|
|
m_length = stream->get32();
|
|
|
|
|
2009-07-24 08:35:21 +00:00
|
|
|
if (m_length) {
|
|
|
|
m_value = new char[m_length+1];
|
|
|
|
stream->getBuffer(m_length, m_value);
|
|
|
|
m_value[m_length] = 0;
|
2014-01-21 00:07:34 -08:00
|
|
|
|
|
|
|
if (type() == TYPE_ASCII || type() == TYPE_ASCII_INTERNED ||
|
|
|
|
type() == TYPE_SHORT_ASCII || type() == TYPE_SHORT_ASCII_INTERNED)
|
|
|
|
ascii_to_utf8(&m_value);
|
2009-07-24 08:35:21 +00:00
|
|
|
} else {
|
|
|
|
m_value = 0;
|
|
|
|
}
|
|
|
|
|
2014-01-21 00:07:34 -08:00
|
|
|
if (type() == TYPE_INTERNED || type() == TYPE_ASCII_INTERNED ||
|
|
|
|
type() == TYPE_SHORT_ASCII_INTERNED)
|
2009-07-24 08:35:21 +00:00
|
|
|
mod->intern(this);
|
|
|
|
}
|
|
|
|
}
|
2009-07-24 19:52:47 +00:00
|
|
|
|
2009-07-24 21:15:51 +00:00
|
|
|
bool PycString::isEqual(PycRef<PycObject> obj) const
|
|
|
|
{
|
2017-07-05 16:36:04 -07:00
|
|
|
if (type() != obj.type())
|
2009-07-25 02:41:15 +00:00
|
|
|
return false;
|
|
|
|
|
2009-07-24 21:15:51 +00:00
|
|
|
PycRef<PycString> strObj = obj.cast<PycString>();
|
2009-07-27 00:23:49 +00:00
|
|
|
return isEqual(strObj->m_value);
|
2009-07-24 21:15:51 +00:00
|
|
|
}
|
|
|
|
|
2009-07-27 00:23:49 +00:00
|
|
|
bool PycString::isEqual(const char* str) const
|
|
|
|
{
|
|
|
|
if (m_value == str)
|
|
|
|
return true;
|
|
|
|
return (strcmp(m_value, str) == 0);
|
|
|
|
}
|
2009-07-24 19:52:47 +00:00
|
|
|
|
2009-08-03 23:13:50 +00:00
|
|
|
void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F)
|
2009-07-24 19:52:47 +00:00
|
|
|
{
|
2009-08-03 23:13:50 +00:00
|
|
|
if (prefix != 0)
|
|
|
|
fputc(prefix, F);
|
|
|
|
|
2009-07-24 19:52:47 +00:00
|
|
|
const char* ch = str->value();
|
2009-07-25 00:02:31 +00:00
|
|
|
int len = str->length();
|
2009-08-03 23:13:50 +00:00
|
|
|
if (ch == 0) {
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs("''", F);
|
2009-07-24 19:52:47 +00:00
|
|
|
return;
|
2009-08-03 23:13:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Determine preferred quote style (Emulate Python's method)
|
|
|
|
bool useQuotes = false;
|
|
|
|
while (len--) {
|
|
|
|
if (*ch == '\'') {
|
|
|
|
useQuotes = true;
|
|
|
|
} else if (*ch == '"') {
|
|
|
|
useQuotes = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ch++;
|
|
|
|
}
|
|
|
|
ch = str->value();
|
|
|
|
len = str->length();
|
|
|
|
|
|
|
|
// Output the string
|
2010-09-03 21:50:35 -07:00
|
|
|
if (triple)
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs(useQuotes ? "\"\"\"" : "'''", F);
|
2010-09-03 21:50:35 -07:00
|
|
|
else
|
|
|
|
fputc(useQuotes ? '"' : '\'', F);
|
2009-07-25 00:02:31 +00:00
|
|
|
while (len--) {
|
|
|
|
if (*ch < 0x20 || *ch == 0x7F) {
|
2009-07-24 19:52:47 +00:00
|
|
|
if (*ch == '\r') {
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs("\\r", F);
|
2009-07-24 19:52:47 +00:00
|
|
|
} else if (*ch == '\n') {
|
2009-08-03 23:13:50 +00:00
|
|
|
if (triple)
|
2009-07-24 19:52:47 +00:00
|
|
|
fputc('\n', F);
|
|
|
|
else
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs("\\n", F);
|
2009-07-24 19:52:47 +00:00
|
|
|
} else if (*ch == '\t') {
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs("\\t", F);
|
2009-07-24 19:52:47 +00:00
|
|
|
} else {
|
2016-08-31 08:28:46 -07:00
|
|
|
fprintf(F, "\\x%02x", (*ch & 0xFF));
|
2009-07-24 19:52:47 +00:00
|
|
|
}
|
2011-10-02 19:51:54 -07:00
|
|
|
} else if ((unsigned char)(*ch) >= 0x80) {
|
2009-07-25 00:02:31 +00:00
|
|
|
if (str->type() == PycObject::TYPE_UNICODE) {
|
|
|
|
// Unicode stored as UTF-8... Let the stream interpret it
|
|
|
|
fputc(*ch, F);
|
|
|
|
} else {
|
2009-08-03 23:13:50 +00:00
|
|
|
fprintf(F, "\\x%x", (*ch & 0xFF));
|
2009-07-25 00:02:31 +00:00
|
|
|
}
|
2009-07-24 19:52:47 +00:00
|
|
|
} else {
|
2009-08-03 23:13:50 +00:00
|
|
|
if (!useQuotes && *ch == '\'')
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs("\\'", F);
|
2009-08-03 23:13:50 +00:00
|
|
|
else if (useQuotes && *ch == '"')
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs("\\\"", F);
|
2011-10-23 19:33:24 -07:00
|
|
|
else if (*ch == '\\')
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs("\\\\", F);
|
2009-07-24 19:52:47 +00:00
|
|
|
else
|
|
|
|
fputc(*ch, F);
|
|
|
|
}
|
|
|
|
ch++;
|
|
|
|
}
|
2010-09-03 21:50:35 -07:00
|
|
|
if (triple)
|
2018-01-28 10:32:44 -08:00
|
|
|
fputs(useQuotes ? "\"\"\"" : "'''", F);
|
2010-09-03 21:50:35 -07:00
|
|
|
else
|
|
|
|
fputc(useQuotes ? '"' : '\'', F);
|
2009-07-24 19:52:47 +00:00
|
|
|
}
|