Issue-165 Added support for f-strings (literal string interpolation https://www.python.org/dev/peps/pep-0498/)

Opcodes handled: FORMAT_VALUE, BUILD_STRING.
Added AST node classes for FormattedValue and JoinedStr.
This commit is contained in:
Aralox
2020-10-17 20:52:57 +11:00
parent 9407b29451
commit 0c9fbd9caf
11 changed files with 257 additions and 24 deletions

1
.gitignore vendored
View File

@@ -4,3 +4,4 @@
*.gcda
*.kdev4
/.kdev4
__pycache__

View File

@@ -79,3 +79,11 @@ const char* ASTBlock::type_str() const
};
return s_type_strings[blktype()];
}
/* ASTFormattedValue */
// This must be a triple quote (''' or """), to handle interpolated string literals containing the opposite quote style.
// E.g. f'''{"interpolated "123' literal"}''' -> valid.
// E.g. f"""{"interpolated "123' literal"}""" -> valid.
// E.g. f'{"interpolated "123' literal"}' -> invalid, unescaped quotes in literal.
// E.g. f'{"interpolated \"123\' literal"}' -> invalid, f-string expression does not allow backslash.
// NOTE: Nested f-strings not supported.
const char* ASTFormattedValue::F_STRING_QUOTE = "'''";

View File

@@ -15,6 +15,7 @@ public:
NODE_TUPLE, NODE_LIST, NODE_MAP, NODE_SUBSCR, NODE_PRINT,
NODE_CONVERT, NODE_KEYWORD, NODE_RAISE, NODE_EXEC, NODE_BLOCK,
NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE,
NODE_FORMATTEDVALUE, NODE_JOINEDSTR,
// Empty node types
NODE_LOCALS,
@@ -611,4 +612,47 @@ private:
PycRef<ASTNode> m_expr;
};
class ASTFormattedValue : public ASTNode {
public:
static const char* F_STRING_QUOTE;
enum ConversionFlag {
NONE=0,
STR=1,
REPR=2,
ASCII=3,
FMTSPEC=4
};
ASTFormattedValue(PycRef<ASTNode> val, ConversionFlag conversion, PycRef<ASTNode> format_spec)
: ASTNode(NODE_FORMATTEDVALUE),
m_val(std::move(val)),
m_conversion(conversion),
m_format_spec(std::move(format_spec))
{}
PycRef<ASTNode> val() const { return m_val; }
ConversionFlag conversion() const { return m_conversion; }
PycRef<ASTNode> format_spec() const { return m_format_spec; }
private:
PycRef<ASTNode> m_val;
ConversionFlag m_conversion;
PycRef<ASTNode> m_format_spec;
};
// Same as ASTList
class ASTJoinedStr : public ASTNode {
public:
typedef std::list<PycRef<ASTNode>> value_t;
ASTJoinedStr(value_t values)
: ASTNode(NODE_JOINEDSTR), m_values(std::move(values)) { }
const value_t& values() const { return m_values; }
private:
value_t m_values;
};
#endif

View File

@@ -366,6 +366,17 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
}
break;
case Pyc::BUILD_STRING_A:
{
// Nearly identical logic to BUILD_LIST
ASTList::value_t values;
for (int i = 0; i < operand; i++) {
values.push_front(stack.top());
stack.pop();
}
stack.push(new ASTJoinedStr(values));
}
break;
case Pyc::BUILD_TUPLE_A:
{
ASTTuple::value_t values;
@@ -782,6 +793,35 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(NULL); // We can totally hack this >_>
}
break;
case Pyc::FORMAT_VALUE_A:
{
auto conversion_flag = static_cast<ASTFormattedValue::ConversionFlag>(operand);
switch (conversion_flag)
{
case ASTFormattedValue::ConversionFlag::NONE:
case ASTFormattedValue::ConversionFlag::STR:
case ASTFormattedValue::ConversionFlag::REPR:
case ASTFormattedValue::ConversionFlag::ASCII:
{
auto val = stack.top();
stack.pop();
stack.push(new ASTFormattedValue(val, conversion_flag, nullptr));
}
break;
case ASTFormattedValue::ConversionFlag::FMTSPEC:
{
auto format_spec = stack.top();
stack.pop();
auto val = stack.top();
stack.pop();
stack.push(new ASTFormattedValue(val, conversion_flag, format_spec));
}
break;
default:
fprintf(stderr, "Unsupported FORMAT_VALUE_A conversion flag: %d\n", operand);
}
}
break;
case Pyc::GET_AWAITABLE:
{
PycRef<ASTNode> object = stack.top();
@@ -2277,6 +2317,33 @@ static void print_block(PycRef<ASTBlock> blk, PycModule* mod) {
}
}
void print_formatted_value(PycRef<ASTFormattedValue> formatted_value, PycModule* mod)
{
fputs("{", pyc_output);
print_src(formatted_value->val(), mod);
switch (formatted_value->conversion())
{
case ASTFormattedValue::ConversionFlag::NONE:
break;
case ASTFormattedValue::ConversionFlag::STR:
fputs("!s", pyc_output);
break;
case ASTFormattedValue::ConversionFlag::REPR:
fputs("!r", pyc_output);
break;
case ASTFormattedValue::ConversionFlag::ASCII:
fputs("!a", pyc_output);
break;
case ASTFormattedValue::ConversionFlag::FMTSPEC:
fprintf(pyc_output, ":%s", formatted_value->format_spec().cast<ASTObject>()->object().cast<PycString>()->value());
break;
default:
fprintf(stderr, "Unsupported NODE_FORMATTEDVALUE conversion flag: %d\n", formatted_value->conversion());
}
fputs("}", pyc_output);
}
void print_src(PycRef<ASTNode> node, PycModule* mod)
{
if (node == NULL) {
@@ -2367,6 +2434,31 @@ void print_src(PycRef<ASTNode> node, PycModule* mod)
}
}
break;
case ASTNode::NODE_FORMATTEDVALUE:
fprintf(pyc_output, "f%s", ASTFormattedValue::F_STRING_QUOTE);
print_formatted_value(node.cast<ASTFormattedValue>(), mod);
fputs(ASTFormattedValue::F_STRING_QUOTE, pyc_output);
break;
case ASTNode::NODE_JOINEDSTR:
fprintf(pyc_output, "f%s", ASTFormattedValue::F_STRING_QUOTE);
for (const auto& val : node.cast<ASTJoinedStr>()->values())
{
switch (val.type())
{
case ASTNode::NODE_FORMATTEDVALUE:
print_formatted_value(val.cast<ASTFormattedValue>(), mod);
break;
case ASTNode::NODE_OBJECT:
// When printing a piece of the f-string, keep the quote style consistent.
// This avoids problems when ''' or """ is part of the string.
print_const(val.cast<ASTObject>()->object(), mod, ASTFormattedValue::F_STRING_QUOTE);
break;
default:
fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type());
}
}
fputs(ASTFormattedValue::F_STRING_QUOTE, pyc_output);
break;
case ASTNode::NODE_KEYWORD:
fprintf(pyc_output, "%s", node.cast<ASTKeyword>()->word_str());
break;

View File

@@ -147,7 +147,7 @@ bool Pyc::IsCompareArg(int opcode)
return (opcode == Pyc::COMPARE_OP_A);
}
void print_const(PycRef<PycObject> obj, PycModule* mod)
void print_const(PycRef<PycObject> obj, PycModule* mod, const char* parent_f_string_quote)
{
if (obj == NULL) {
fputs("<NULL>", pyc_output);
@@ -156,10 +156,10 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
switch (obj->type()) {
case PycObject::TYPE_STRING:
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0, false, pyc_output, parent_f_string_quote);
break;
case PycObject::TYPE_UNICODE:
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u');
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u', false, pyc_output, parent_f_string_quote);
break;
case PycObject::TYPE_STRINGREF:
case PycObject::TYPE_INTERNED:
@@ -168,9 +168,9 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
case PycObject::TYPE_SHORT_ASCII:
case PycObject::TYPE_SHORT_ASCII_INTERNED:
if (mod->majorVer() >= 3)
OutputString(obj.cast<PycString>(), 0);
OutputString(obj.cast<PycString>(), 0, false, pyc_output, parent_f_string_quote);
else
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0, false, pyc_output, parent_f_string_quote);
break;
case PycObject::TYPE_TUPLE:
case PycObject::TYPE_SMALL_TUPLE:

View File

@@ -29,6 +29,6 @@ bool IsCompareArg(int opcode);
}
void print_const(PycRef<PycObject> obj, PycModule* mod);
void print_const(PycRef<PycObject> obj, PycModule* mod, const char* parent_f_string_quote = nullptr);
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos);
void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent);

View File

@@ -85,7 +85,7 @@ bool PycString::isEqual(PycRef<PycObject> obj) const
return isEqual(strObj->m_value);
}
void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F)
void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F, const char* parent_f_string_quote)
{
if (prefix != 0)
fputc(prefix, F);
@@ -99,23 +99,31 @@ void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F)
// Determine preferred quote style (Emulate Python's method)
bool useQuotes = false;
while (len--) {
if (*ch == '\'') {
useQuotes = true;
} else if (*ch == '"') {
useQuotes = false;
break;
if (!parent_f_string_quote) {
while (len--) {
if (*ch == '\'') {
useQuotes = true;
}
else if (*ch == '"') {
useQuotes = false;
break;
}
ch++;
}
ch++;
}
else {
useQuotes = parent_f_string_quote[0] == '"';
}
ch = str->value();
len = str->length();
// Output the string
if (triple)
fputs(useQuotes ? "\"\"\"" : "'''", F);
else
fputc(useQuotes ? '"' : '\'', F);
if (!parent_f_string_quote) {
if (triple)
fputs(useQuotes ? "\"\"\"" : "'''", F);
else
fputc(useQuotes ? '"' : '\'', F);
}
while (len--) {
if (*ch < 0x20 || *ch == 0x7F) {
if (*ch == '\r') {
@@ -144,13 +152,19 @@ void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F)
fputs("\\\"", F);
else if (*ch == '\\')
fputs("\\\\", F);
else if (parent_f_string_quote && *ch == '{')
fputs("{{", F);
else if (parent_f_string_quote && *ch == '}')
fputs("}}", F);
else
fputc(*ch, F);
}
ch++;
}
if (triple)
fputs(useQuotes ? "\"\"\"" : "'''", F);
else
fputc(useQuotes ? '"' : '\'', F);
if (!parent_f_string_quote) {
if (triple)
fputs(useQuotes ? "\"\"\"" : "'''", F);
else
fputc(useQuotes ? '"' : '\'', F);
}
}

View File

@@ -31,7 +31,7 @@ private:
std::string m_value;
};
void OutputString(PycRef<PycString> str, char prefix = 0,
bool triple = false, FILE* F = pyc_output);
void OutputString(PycRef<PycString> str, char prefix = 0, bool triple = false,
FILE* F = pyc_output, const char* parent_f_string_quote = nullptr);
#endif

Binary file not shown.

42
tests/input/f-string.py Normal file
View File

@@ -0,0 +1,42 @@
var1 = 'x'
var2 = 'y'
x = s1 = var3 = 1.23456
a = 15
some_dict = {}
some_dict[2] = 3
f''
f'''{123}'''
f'''{123}{var1}'''
f'''{123}ok'''
f'''ok{123}'''
assigned = f'''{123}'''
print(f'''{123}''')
print(f'''{123}{123}{var3}{123}''')
print(f'''{var3}''')
print(f'''{var3:4.5}''')
print(f'''f-string {123}''')
print(fr'{123}:\s+')
print(f'x{12}' * 3)
print(f'''f-string. \t\tformat value 0: {var1}, 1 (!s): {var2!s}, 2 (!r): {var2!r}, 3 (!a): {var2!a}, 4: {var3:6.3}, constant: {123}. End.''')
print('percent format %d ' % 444 + f'''f-string {123} and {var1!s}''' + f''' add another f-str {var3:2.3}''' + ' regular string ' * 2)
print(f"""'''{'single quoted string'} 'singles in f-string' {"single quote ' inside"} "doubles in f-string" {"double quoted string"} " both ' {'double quotes " inside'}'''""")
print(f'''"""{'single quoted string'} 'singles in f-string' {"single quote ' inside"} "doubles in f-string" {"double quoted string"} " both ' {'double quotes " inside'}"""''')
print(f'single quote \t\t{var1}"{var1!s}" \'{var2!a}{var3:.2f}\' """{var1!r}""" \'\'\'{var2}\'\'\'')
print(f"double quote \t\t{var1}\"{var1!s}\" '{var2!a}{var3:.2f}' \"\"\"{var1!r}\"\"\" '''{var2}'''")
print(f'{var3 * x} {var3:.2f} {var3:.5f} {x:02} {x*x:3} {x*x*x:4} {s1:>10} {a:x} {a:o} {a:e}')
print(f'''some {{braces}} {"inner literal: {braces} {{double braces}}"}''')
print(f'''f-string dict {some_dict[2]} and {{function call in expression}}: {max([1,20,3])}''')
print(f'{(lambda x: x*2)(3)}')
msg = (
f'a {var1}'
f'cool'
f'multiline {var2}\n'
f'f-string {var3}'
)
# Commented out because LOAD_/CALL_METHOD not supported. https://github.com/zrax/pycdc/issues/163
# The f-string will decompile correctly, however.
# import datetime
# datetime.date(2015,9,29)
#print(f'{now:%Y-%m-%d %H:%M}')

View File

@@ -0,0 +1,32 @@
var1 = 'x' <EOL>
var2 = 'y' <EOL>
x = 1.23456 <EOL>
s1 = 1.23456 <EOL>
var3 = 1.23456 <EOL>
a = 15 <EOL>
some_dict = { } <EOL>
some_dict [ 2 ] = 3 <EOL>
f'' <EOL>
f'{123}' <EOL>
f'{123}{var1}' <EOL>
f'{123}ok' <EOL>
f'ok{123}' <EOL>
assigned = f'{123}' <EOL>
print ( f'{123}' ) <EOL>
print ( f'{123}{123}{var3}{123}' ) <EOL>
print ( f'{var3}' ) <EOL>
print ( f'{var3:4.5}' ) <EOL>
print ( f'f-string {123}' ) <EOL>
print ( f'{123}:\\s+' ) <EOL>
print ( f'x{12}' * 3 ) <EOL>
print ( f'f-string. \t\tformat value 0: {var1}, 1 (!s): {var2!s}, 2 (!r): {var2!r}, 3 (!a): {var2!a}, 4: {var3:6.3}, constant: {123}. End.' ) <EOL>
print ( 'percent format %d ' % 444 + f'f-string {123} and {var1!s}' + f' add another f-str {var3:2.3}' + ' regular string regular string ' ) <EOL>
print ( f'' ) <EOL>
print ( f'"""{\'single quoted string\'} \'singles in f-string\' {"single quote \' inside"} "doubles in f-string" {\'double quoted string\'} " both \' {\'double quotes " inside\'}"""' ) <EOL>
print ( f'' ) <EOL>
print ( f'' ) <EOL>
print ( f'{var3 * x} {var3:.2f} {var3:.5f} {x:02} {x * x:3} {x * x * x:4} {s1:>10} {a:x} {a:o} {a:e}' ) <EOL>
print ( f'some {{braces}} {\'inner literal: {braces} {{double braces}}\'}' ) <EOL>
print ( f'f-string dict {some_dict[2]} and {{function call in expression}}: {max([\n 1,\n 20,\n 3])}' ) <EOL>
print ( f'{(lambda x: x * 2)(3)}' ) <EOL>
msg = f'a {var1}coolmultiline {var2}\nf-string {var3}' <EOL>