Print unicode as default in Python 2.x when CO_FUTURE_UNICODE_LITERALS
is set. Fixes #141.
This commit is contained in:
30
ASTree.cpp
30
ASTree.cpp
@@ -2777,17 +2777,25 @@ bool print_docstring(PycRef<PycObject> obj, int indent, PycModule* mod)
|
||||
{
|
||||
// docstrings are translated from the bytecode __doc__ = 'string' to simply '''string'''
|
||||
signed char prefix = -1;
|
||||
if (obj.type() == PycObject::TYPE_STRING)
|
||||
prefix = mod->majorVer() == 3 ? 'b' : 0;
|
||||
else if (obj.type() == PycObject::TYPE_UNICODE)
|
||||
prefix = mod->majorVer() == 3 ? 0 : 'u';
|
||||
else if (obj.type() == PycObject::TYPE_INTERNED ||
|
||||
obj.type() == PycObject::TYPE_STRINGREF ||
|
||||
obj.type() == PycObject::TYPE_ASCII ||
|
||||
obj.type() == PycObject::TYPE_ASCII_INTERNED ||
|
||||
obj.type() == PycObject::TYPE_SHORT_ASCII ||
|
||||
obj.type() == PycObject::TYPE_SHORT_ASCII_INTERNED)
|
||||
prefix = 0;
|
||||
switch (obj.type()) {
|
||||
case PycObject::TYPE_STRING:
|
||||
prefix = mod->strIsUnicode() ? 'b' : 0;
|
||||
break;
|
||||
case PycObject::TYPE_UNICODE:
|
||||
prefix = mod->strIsUnicode() ? 0 : 'u';
|
||||
break;
|
||||
case PycObject::TYPE_STRINGREF:
|
||||
case PycObject::TYPE_INTERNED:
|
||||
case PycObject::TYPE_ASCII:
|
||||
case PycObject::TYPE_ASCII_INTERNED:
|
||||
case PycObject::TYPE_SHORT_ASCII:
|
||||
case PycObject::TYPE_SHORT_ASCII_INTERNED:
|
||||
if (mod->majorVer() >= 3)
|
||||
prefix = 0;
|
||||
else
|
||||
prefix = mod->strIsUnicode() ? 'b' : 0;
|
||||
break;
|
||||
}
|
||||
if (prefix != -1) {
|
||||
start_line(indent);
|
||||
OutputString(obj.cast<PycString>(), prefix, true);
|
||||
|
@@ -151,10 +151,10 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
|
||||
|
||||
switch (obj->type()) {
|
||||
case PycObject::TYPE_STRING:
|
||||
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 'b' : 0);
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
|
||||
break;
|
||||
case PycObject::TYPE_UNICODE:
|
||||
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 0 : 'u');
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u');
|
||||
break;
|
||||
case PycObject::TYPE_STRINGREF:
|
||||
case PycObject::TYPE_INTERNED:
|
||||
@@ -162,7 +162,10 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
|
||||
case PycObject::TYPE_ASCII_INTERNED:
|
||||
case PycObject::TYPE_SHORT_ASCII:
|
||||
case PycObject::TYPE_SHORT_ASCII_INTERNED:
|
||||
OutputString(obj.cast<PycString>(), 0);
|
||||
if (mod->majorVer() >= 3)
|
||||
OutputString(obj.cast<PycString>(), 0);
|
||||
else
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
|
||||
break;
|
||||
case PycObject::TYPE_TUPLE:
|
||||
case PycObject::TYPE_SMALL_TUPLE:
|
||||
|
@@ -53,6 +53,11 @@ public:
|
||||
|
||||
bool isUnicode() const { return m_unicode; }
|
||||
|
||||
bool strIsUnicode() const
|
||||
{
|
||||
return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS) != 0;
|
||||
}
|
||||
|
||||
PycRef<PycCode> code() const { return m_code; }
|
||||
|
||||
void intern(PycRef<PycString> str) { m_interns.push_back(str); }
|
||||
|
@@ -129,12 +129,12 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
|
||||
break;
|
||||
case PycObject::TYPE_STRING:
|
||||
iputs(indent, "");
|
||||
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 'b' : 0);
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
|
||||
fputs("\n", pyc_output);
|
||||
break;
|
||||
case PycObject::TYPE_UNICODE:
|
||||
iputs(indent, "");
|
||||
OutputString(obj.cast<PycString>(), (mod->majorVer() == 3) ? 0 : 'u');
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u');
|
||||
fputs("\n", pyc_output);
|
||||
break;
|
||||
case PycObject::TYPE_STRINGREF:
|
||||
@@ -144,7 +144,10 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent)
|
||||
case PycObject::TYPE_SHORT_ASCII:
|
||||
case PycObject::TYPE_SHORT_ASCII_INTERNED:
|
||||
iputs(indent, "");
|
||||
OutputString(obj.cast<PycString>(), 0);
|
||||
if (mod->majorVer() >= 3)
|
||||
OutputString(obj.cast<PycString>(), 0);
|
||||
else
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
|
||||
fputs("\n", pyc_output);
|
||||
break;
|
||||
case PycObject::TYPE_TUPLE:
|
||||
|
@@ -110,7 +110,7 @@ RE_WHITESPACE = re.compile(r'\s+')
|
||||
RE_WORD = re.compile(r'[A-Za-z_][A-Za-z0-9_]*')
|
||||
RE_INT = re.compile(r'[0-9][0-9_]*|0[Xx][0-9A-Fa-f_]+|0[Bb][0-1_]+|0[Oo][0-7_]+')
|
||||
RE_FLOAT = re.compile(r'(([0-9][0-9_]*)?\.[0-9][0-9_]*|[0-9][0-9_]*\.)([eE][+-]?[0-9][0-9_]*)?')
|
||||
RE_START_STRING = re.compile(r'([rR][fFbB]?|[uU]|[fF][rR]?|[bB][rR]+)?(\'\'\'|\'|"""|")')
|
||||
RE_START_STRING = re.compile(r'([rR][fFbB]?|[uU]|[fF][rR]?|[bB][rR]?)?(\'\'\'|\'|"""|")')
|
||||
|
||||
# Note, tokens sharing a common prefix should be entered in order from
|
||||
# longest to shortest, so we don't mismatch a long token as a sequence
|
||||
|
BIN
tests/compiled/unicode_future.2.6.pyc
Normal file
BIN
tests/compiled/unicode_future.2.6.pyc
Normal file
Binary file not shown.
BIN
tests/compiled/unicode_future.2.7.pyc
Normal file
BIN
tests/compiled/unicode_future.2.7.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user