Print unicode as default in Python 2.x when CO_FUTURE_UNICODE_LITERALS

is set.

Fixes #141.
This commit is contained in:
Michael Hansen
2019-10-06 14:34:24 -07:00
parent 892616b560
commit 8014ac2b14
7 changed files with 37 additions and 18 deletions

View File

@@ -2777,17 +2777,25 @@ bool print_docstring(PycRef<PycObject> obj, int indent, PycModule* mod)
{
// docstrings are translated from the bytecode __doc__ = 'string' to simply '''string'''
signed char prefix = -1;
if (obj.type() == PycObject::TYPE_STRING)
prefix = mod->majorVer() == 3 ? 'b' : 0;
else if (obj.type() == PycObject::TYPE_UNICODE)
prefix = mod->majorVer() == 3 ? 0 : 'u';
else if (obj.type() == PycObject::TYPE_INTERNED ||
obj.type() == PycObject::TYPE_STRINGREF ||
obj.type() == PycObject::TYPE_ASCII ||
obj.type() == PycObject::TYPE_ASCII_INTERNED ||
obj.type() == PycObject::TYPE_SHORT_ASCII ||
obj.type() == PycObject::TYPE_SHORT_ASCII_INTERNED)
prefix = 0;
switch (obj.type()) {
case PycObject::TYPE_STRING:
prefix = mod->strIsUnicode() ? 'b' : 0;
break;
case PycObject::TYPE_UNICODE:
prefix = mod->strIsUnicode() ? 0 : 'u';
break;
case PycObject::TYPE_STRINGREF:
case PycObject::TYPE_INTERNED:
case PycObject::TYPE_ASCII:
case PycObject::TYPE_ASCII_INTERNED:
case PycObject::TYPE_SHORT_ASCII:
case PycObject::TYPE_SHORT_ASCII_INTERNED:
if (mod->majorVer() >= 3)
prefix = 0;
else
prefix = mod->strIsUnicode() ? 'b' : 0;
break;
}
if (prefix != -1) {
start_line(indent);
OutputString(obj.cast<PycString>(), prefix, true);

View File

@@ -151,10 +151,10 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
switch (obj->type()) {
case PycObject::TYPE_STRING:
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 'b' : 0);
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
break;
case PycObject::TYPE_UNICODE:
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 0 : 'u');
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u');
break;
case PycObject::TYPE_STRINGREF:
case PycObject::TYPE_INTERNED:
@@ -162,7 +162,10 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
case PycObject::TYPE_ASCII_INTERNED:
case PycObject::TYPE_SHORT_ASCII:
case PycObject::TYPE_SHORT_ASCII_INTERNED:
OutputString(obj.cast<PycString>(), 0);
if (mod->majorVer() >= 3)
OutputString(obj.cast<PycString>(), 0);
else
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
break;
case PycObject::TYPE_TUPLE:
case PycObject::TYPE_SMALL_TUPLE:

View File

@@ -53,6 +53,11 @@ public:
bool isUnicode() const { return m_unicode; }
bool strIsUnicode() const
{
return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS) != 0;
}
PycRef<PycCode> code() const { return m_code; }
void intern(PycRef<PycString> str) { m_interns.push_back(str); }

View File

@@ -129,12 +129,12 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
break;
case PycObject::TYPE_STRING:
iputs(indent, "");
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 'b' : 0);
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
fputs("\n", pyc_output);
break;
case PycObject::TYPE_UNICODE:
iputs(indent, "");
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 0 : 'u');
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u');
fputs("\n", pyc_output);
break;
case PycObject::TYPE_STRINGREF:
@@ -144,7 +144,10 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
case PycObject::TYPE_SHORT_ASCII:
case PycObject::TYPE_SHORT_ASCII_INTERNED:
iputs(indent, "");
OutputString(obj.cast<PycString>(), 0);
if (mod->majorVer() >= 3)
OutputString(obj.cast<PycString>(), 0);
else
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
fputs("\n", pyc_output);
break;
case PycObject::TYPE_TUPLE:

View File

@@ -110,7 +110,7 @@ RE_WHITESPACE = re.compile(r'\s+')
RE_WORD = re.compile(r'[A-Za-z_][A-Za-z0-9_]*')
RE_INT = re.compile(r'[0-9][0-9_]*|0[Xx][0-9A-Fa-f_]+|0[Bb][0-1_]+|0[Oo][0-7_]+')
RE_FLOAT = re.compile(r'(([0-9][0-9_]*)?\.[0-9][0-9_]*|[0-9][0-9_]*\.)([eE][+-]?[0-9][0-9_]*)?')
RE_START_STRING = re.compile(r'([rR][fFbB]?|[uU]|[fF][rR]?|[bB][rR]+)?(\'\'\'|\'|"""|")')
RE_START_STRING = re.compile(r'([rR][fFbB]?|[uU]|[fF][rR]?|[bB][rR]?)?(\'\'\'|\'|"""|")')
# Note, tokens sharing a common prefix should be entered in order from
# longest to shortest, so we don't mismatch a long token as a sequence

Binary file not shown.

Binary file not shown.