Issue-165 Added support for f-strings (literal string interpolation https://www.python.org/dev/peps/pep-0498/)
Opcodes handled: FORMAT_VALUE, BUILD_STRING. Added AST node classes for FormattedValue and JoinedStr.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -4,3 +4,4 @@
|
||||
*.gcda
|
||||
*.kdev4
|
||||
/.kdev4
|
||||
__pycache__
|
||||
|
@@ -79,3 +79,11 @@ const char* ASTBlock::type_str() const
|
||||
};
|
||||
return s_type_strings[blktype()];
|
||||
}
|
||||
/* ASTFormattedValue */
|
||||
// This must be a triple quote (''' or """), to handle interpolated string literals containing the opposite quote style.
|
||||
// E.g. f'''{"interpolated "123' literal"}''' -> valid.
|
||||
// E.g. f"""{"interpolated "123' literal"}""" -> valid.
|
||||
// E.g. f'{"interpolated "123' literal"}' -> invalid, unescaped quotes in literal.
|
||||
// E.g. f'{"interpolated \"123\' literal"}' -> invalid, f-string expression does not allow backslash.
|
||||
// NOTE: Nested f-strings not supported.
|
||||
const char* ASTFormattedValue::F_STRING_QUOTE = "'''";
|
||||
|
44
ASTNode.h
44
ASTNode.h
@@ -15,6 +15,7 @@ public:
|
||||
NODE_TUPLE, NODE_LIST, NODE_MAP, NODE_SUBSCR, NODE_PRINT,
|
||||
NODE_CONVERT, NODE_KEYWORD, NODE_RAISE, NODE_EXEC, NODE_BLOCK,
|
||||
NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE,
|
||||
NODE_FORMATTEDVALUE, NODE_JOINEDSTR,
|
||||
|
||||
// Empty node types
|
||||
NODE_LOCALS,
|
||||
@@ -611,4 +612,47 @@ private:
|
||||
PycRef<ASTNode> m_expr;
|
||||
};
|
||||
|
||||
class ASTFormattedValue : public ASTNode {
|
||||
public:
|
||||
static const char* F_STRING_QUOTE;
|
||||
enum ConversionFlag {
|
||||
NONE=0,
|
||||
STR=1,
|
||||
REPR=2,
|
||||
ASCII=3,
|
||||
FMTSPEC=4
|
||||
};
|
||||
|
||||
ASTFormattedValue(PycRef<ASTNode> val, ConversionFlag conversion, PycRef<ASTNode> format_spec)
|
||||
: ASTNode(NODE_FORMATTEDVALUE),
|
||||
m_val(std::move(val)),
|
||||
m_conversion(conversion),
|
||||
m_format_spec(std::move(format_spec))
|
||||
{}
|
||||
|
||||
PycRef<ASTNode> val() const { return m_val; }
|
||||
ConversionFlag conversion() const { return m_conversion; }
|
||||
PycRef<ASTNode> format_spec() const { return m_format_spec; }
|
||||
|
||||
private:
|
||||
PycRef<ASTNode> m_val;
|
||||
ConversionFlag m_conversion;
|
||||
PycRef<ASTNode> m_format_spec;
|
||||
};
|
||||
|
||||
// Same as ASTList
|
||||
class ASTJoinedStr : public ASTNode {
|
||||
public:
|
||||
typedef std::list<PycRef<ASTNode>> value_t;
|
||||
|
||||
ASTJoinedStr(value_t values)
|
||||
: ASTNode(NODE_JOINEDSTR), m_values(std::move(values)) { }
|
||||
|
||||
const value_t& values() const { return m_values; }
|
||||
|
||||
private:
|
||||
value_t m_values;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
92
ASTree.cpp
92
ASTree.cpp
@@ -366,6 +366,17 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Pyc::BUILD_STRING_A:
|
||||
{
|
||||
// Nearly identical logic to BUILD_LIST
|
||||
ASTList::value_t values;
|
||||
for (int i = 0; i < operand; i++) {
|
||||
values.push_front(stack.top());
|
||||
stack.pop();
|
||||
}
|
||||
stack.push(new ASTJoinedStr(values));
|
||||
}
|
||||
break;
|
||||
case Pyc::BUILD_TUPLE_A:
|
||||
{
|
||||
ASTTuple::value_t values;
|
||||
@@ -782,6 +793,35 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
|
||||
stack.push(NULL); // We can totally hack this >_>
|
||||
}
|
||||
break;
|
||||
case Pyc::FORMAT_VALUE_A:
|
||||
{
|
||||
auto conversion_flag = static_cast<ASTFormattedValue::ConversionFlag>(operand);
|
||||
switch (conversion_flag)
|
||||
{
|
||||
case ASTFormattedValue::ConversionFlag::NONE:
|
||||
case ASTFormattedValue::ConversionFlag::STR:
|
||||
case ASTFormattedValue::ConversionFlag::REPR:
|
||||
case ASTFormattedValue::ConversionFlag::ASCII:
|
||||
{
|
||||
auto val = stack.top();
|
||||
stack.pop();
|
||||
stack.push(new ASTFormattedValue(val, conversion_flag, nullptr));
|
||||
}
|
||||
break;
|
||||
case ASTFormattedValue::ConversionFlag::FMTSPEC:
|
||||
{
|
||||
auto format_spec = stack.top();
|
||||
stack.pop();
|
||||
auto val = stack.top();
|
||||
stack.pop();
|
||||
stack.push(new ASTFormattedValue(val, conversion_flag, format_spec));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unsupported FORMAT_VALUE_A conversion flag: %d\n", operand);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Pyc::GET_AWAITABLE:
|
||||
{
|
||||
PycRef<ASTNode> object = stack.top();
|
||||
@@ -2277,6 +2317,33 @@ static void print_block(PycRef<ASTBlock> blk, PycModule* mod) {
|
||||
}
|
||||
}
|
||||
|
||||
void print_formatted_value(PycRef<ASTFormattedValue> formatted_value, PycModule* mod)
|
||||
{
|
||||
fputs("{", pyc_output);
|
||||
print_src(formatted_value->val(), mod);
|
||||
|
||||
switch (formatted_value->conversion())
|
||||
{
|
||||
case ASTFormattedValue::ConversionFlag::NONE:
|
||||
break;
|
||||
case ASTFormattedValue::ConversionFlag::STR:
|
||||
fputs("!s", pyc_output);
|
||||
break;
|
||||
case ASTFormattedValue::ConversionFlag::REPR:
|
||||
fputs("!r", pyc_output);
|
||||
break;
|
||||
case ASTFormattedValue::ConversionFlag::ASCII:
|
||||
fputs("!a", pyc_output);
|
||||
break;
|
||||
case ASTFormattedValue::ConversionFlag::FMTSPEC:
|
||||
fprintf(pyc_output, ":%s", formatted_value->format_spec().cast<ASTObject>()->object().cast<PycString>()->value());
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unsupported NODE_FORMATTEDVALUE conversion flag: %d\n", formatted_value->conversion());
|
||||
}
|
||||
fputs("}", pyc_output);
|
||||
}
|
||||
|
||||
void print_src(PycRef<ASTNode> node, PycModule* mod)
|
||||
{
|
||||
if (node == NULL) {
|
||||
@@ -2367,6 +2434,31 @@ void print_src(PycRef<ASTNode> node, PycModule* mod)
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ASTNode::NODE_FORMATTEDVALUE:
|
||||
fprintf(pyc_output, "f%s", ASTFormattedValue::F_STRING_QUOTE);
|
||||
print_formatted_value(node.cast<ASTFormattedValue>(), mod);
|
||||
fputs(ASTFormattedValue::F_STRING_QUOTE, pyc_output);
|
||||
break;
|
||||
case ASTNode::NODE_JOINEDSTR:
|
||||
fprintf(pyc_output, "f%s", ASTFormattedValue::F_STRING_QUOTE);
|
||||
for (const auto& val : node.cast<ASTJoinedStr>()->values())
|
||||
{
|
||||
switch (val.type())
|
||||
{
|
||||
case ASTNode::NODE_FORMATTEDVALUE:
|
||||
print_formatted_value(val.cast<ASTFormattedValue>(), mod);
|
||||
break;
|
||||
case ASTNode::NODE_OBJECT:
|
||||
// When printing a piece of the f-string, keep the quote style consistent.
|
||||
// This avoids problems when ''' or """ is part of the string.
|
||||
print_const(val.cast<ASTObject>()->object(), mod, ASTFormattedValue::F_STRING_QUOTE);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type());
|
||||
}
|
||||
}
|
||||
fputs(ASTFormattedValue::F_STRING_QUOTE, pyc_output);
|
||||
break;
|
||||
case ASTNode::NODE_KEYWORD:
|
||||
fprintf(pyc_output, "%s", node.cast<ASTKeyword>()->word_str());
|
||||
break;
|
||||
|
10
bytecode.cpp
10
bytecode.cpp
@@ -147,7 +147,7 @@ bool Pyc::IsCompareArg(int opcode)
|
||||
return (opcode == Pyc::COMPARE_OP_A);
|
||||
}
|
||||
|
||||
void print_const(PycRef<PycObject> obj, PycModule* mod)
|
||||
void print_const(PycRef<PycObject> obj, PycModule* mod, const char* parent_f_string_quote)
|
||||
{
|
||||
if (obj == NULL) {
|
||||
fputs("<NULL>", pyc_output);
|
||||
@@ -156,10 +156,10 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
|
||||
|
||||
switch (obj->type()) {
|
||||
case PycObject::TYPE_STRING:
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0, false, pyc_output, parent_f_string_quote);
|
||||
break;
|
||||
case PycObject::TYPE_UNICODE:
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u');
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 0 : 'u', false, pyc_output, parent_f_string_quote);
|
||||
break;
|
||||
case PycObject::TYPE_STRINGREF:
|
||||
case PycObject::TYPE_INTERNED:
|
||||
@@ -168,9 +168,9 @@ void print_const(PycRef<PycObject> obj, PycModule* mod)
|
||||
case PycObject::TYPE_SHORT_ASCII:
|
||||
case PycObject::TYPE_SHORT_ASCII_INTERNED:
|
||||
if (mod->majorVer() >= 3)
|
||||
OutputString(obj.cast<PycString>(), 0);
|
||||
OutputString(obj.cast<PycString>(), 0, false, pyc_output, parent_f_string_quote);
|
||||
else
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0);
|
||||
OutputString(obj.cast<PycString>(), mod->strIsUnicode() ? 'b' : 0, false, pyc_output, parent_f_string_quote);
|
||||
break;
|
||||
case PycObject::TYPE_TUPLE:
|
||||
case PycObject::TYPE_SMALL_TUPLE:
|
||||
|
@@ -29,6 +29,6 @@ bool IsCompareArg(int opcode);
|
||||
|
||||
}
|
||||
|
||||
void print_const(PycRef<PycObject> obj, PycModule* mod);
|
||||
void print_const(PycRef<PycObject> obj, PycModule* mod, const char* parent_f_string_quote = nullptr);
|
||||
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos);
|
||||
void bc_disasm(PycRef<PycCode> code, PycModule* mod, int indent);
|
||||
|
@@ -85,7 +85,7 @@ bool PycString::isEqual(PycRef<PycObject> obj) const
|
||||
return isEqual(strObj->m_value);
|
||||
}
|
||||
|
||||
void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F)
|
||||
void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F, const char* parent_f_string_quote)
|
||||
{
|
||||
if (prefix != 0)
|
||||
fputc(prefix, F);
|
||||
@@ -99,23 +99,31 @@ void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F)
|
||||
|
||||
// Determine preferred quote style (Emulate Python's method)
|
||||
bool useQuotes = false;
|
||||
while (len--) {
|
||||
if (*ch == '\'') {
|
||||
useQuotes = true;
|
||||
} else if (*ch == '"') {
|
||||
useQuotes = false;
|
||||
break;
|
||||
if (!parent_f_string_quote) {
|
||||
while (len--) {
|
||||
if (*ch == '\'') {
|
||||
useQuotes = true;
|
||||
}
|
||||
else if (*ch == '"') {
|
||||
useQuotes = false;
|
||||
break;
|
||||
}
|
||||
ch++;
|
||||
}
|
||||
ch++;
|
||||
}
|
||||
else {
|
||||
useQuotes = parent_f_string_quote[0] == '"';
|
||||
}
|
||||
ch = str->value();
|
||||
len = str->length();
|
||||
|
||||
// Output the string
|
||||
if (triple)
|
||||
fputs(useQuotes ? "\"\"\"" : "'''", F);
|
||||
else
|
||||
fputc(useQuotes ? '"' : '\'', F);
|
||||
if (!parent_f_string_quote) {
|
||||
if (triple)
|
||||
fputs(useQuotes ? "\"\"\"" : "'''", F);
|
||||
else
|
||||
fputc(useQuotes ? '"' : '\'', F);
|
||||
}
|
||||
while (len--) {
|
||||
if (*ch < 0x20 || *ch == 0x7F) {
|
||||
if (*ch == '\r') {
|
||||
@@ -144,13 +152,19 @@ void OutputString(PycRef<PycString> str, char prefix, bool triple, FILE* F)
|
||||
fputs("\\\"", F);
|
||||
else if (*ch == '\\')
|
||||
fputs("\\\\", F);
|
||||
else if (parent_f_string_quote && *ch == '{')
|
||||
fputs("{{", F);
|
||||
else if (parent_f_string_quote && *ch == '}')
|
||||
fputs("}}", F);
|
||||
else
|
||||
fputc(*ch, F);
|
||||
}
|
||||
ch++;
|
||||
}
|
||||
if (triple)
|
||||
fputs(useQuotes ? "\"\"\"" : "'''", F);
|
||||
else
|
||||
fputc(useQuotes ? '"' : '\'', F);
|
||||
if (!parent_f_string_quote) {
|
||||
if (triple)
|
||||
fputs(useQuotes ? "\"\"\"" : "'''", F);
|
||||
else
|
||||
fputc(useQuotes ? '"' : '\'', F);
|
||||
}
|
||||
}
|
||||
|
@@ -31,7 +31,7 @@ private:
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
void OutputString(PycRef<PycString> str, char prefix = 0,
|
||||
bool triple = false, FILE* F = pyc_output);
|
||||
void OutputString(PycRef<PycString> str, char prefix = 0, bool triple = false,
|
||||
FILE* F = pyc_output, const char* parent_f_string_quote = nullptr);
|
||||
|
||||
#endif
|
||||
|
BIN
tests/compiled/f-string.3.7.pyc
Normal file
BIN
tests/compiled/f-string.3.7.pyc
Normal file
Binary file not shown.
42
tests/input/f-string.py
Normal file
42
tests/input/f-string.py
Normal file
@@ -0,0 +1,42 @@
|
||||
var1 = 'x'
|
||||
var2 = 'y'
|
||||
x = s1 = var3 = 1.23456
|
||||
a = 15
|
||||
some_dict = {}
|
||||
some_dict[2] = 3
|
||||
|
||||
f''
|
||||
f'''{123}'''
|
||||
f'''{123}{var1}'''
|
||||
f'''{123}ok'''
|
||||
f'''ok{123}'''
|
||||
assigned = f'''{123}'''
|
||||
print(f'''{123}''')
|
||||
print(f'''{123}{123}{var3}{123}''')
|
||||
print(f'''{var3}''')
|
||||
print(f'''{var3:4.5}''')
|
||||
print(f'''f-string {123}''')
|
||||
print(fr'{123}:\s+')
|
||||
print(f'x{12}' * 3)
|
||||
print(f'''f-string. \t\tformat value 0: {var1}, 1 (!s): {var2!s}, 2 (!r): {var2!r}, 3 (!a): {var2!a}, 4: {var3:6.3}, constant: {123}. End.''')
|
||||
print('percent format %d ' % 444 + f'''f-string {123} and {var1!s}''' + f''' add another f-str {var3:2.3}''' + ' regular string ' * 2)
|
||||
print(f"""'''{'single quoted string'} 'singles in f-string' {"single quote ' inside"} "doubles in f-string" {"double quoted string"} " both ' {'double quotes " inside'}'''""")
|
||||
print(f'''"""{'single quoted string'} 'singles in f-string' {"single quote ' inside"} "doubles in f-string" {"double quoted string"} " both ' {'double quotes " inside'}"""''')
|
||||
print(f'single quote \t\t{var1}"{var1!s}" \'{var2!a}{var3:.2f}\' """{var1!r}""" \'\'\'{var2}\'\'\'')
|
||||
print(f"double quote \t\t{var1}\"{var1!s}\" '{var2!a}{var3:.2f}' \"\"\"{var1!r}\"\"\" '''{var2}'''")
|
||||
print(f'{var3 * x} {var3:.2f} {var3:.5f} {x:02} {x*x:3} {x*x*x:4} {s1:>10} {a:x} {a:o} {a:e}')
|
||||
print(f'''some {{braces}} {"inner literal: {braces} {{double braces}}"}''')
|
||||
print(f'''f-string dict {some_dict[2]} and {{function call in expression}}: {max([1,20,3])}''')
|
||||
print(f'{(lambda x: x*2)(3)}')
|
||||
msg = (
|
||||
f'a {var1}'
|
||||
f'cool'
|
||||
f'multiline {var2}\n'
|
||||
f'f-string {var3}'
|
||||
)
|
||||
|
||||
# Commented out because LOAD_/CALL_METHOD not supported. https://github.com/zrax/pycdc/issues/163
|
||||
# The f-string will decompile correctly, however.
|
||||
# import datetime
|
||||
# datetime.date(2015,9,29)
|
||||
#print(f'{now:%Y-%m-%d %H:%M}')
|
32
tests/tokenized/f-string.txt
Normal file
32
tests/tokenized/f-string.txt
Normal file
@@ -0,0 +1,32 @@
|
||||
var1 = 'x' <EOL>
|
||||
var2 = 'y' <EOL>
|
||||
x = 1.23456 <EOL>
|
||||
s1 = 1.23456 <EOL>
|
||||
var3 = 1.23456 <EOL>
|
||||
a = 15 <EOL>
|
||||
some_dict = { } <EOL>
|
||||
some_dict [ 2 ] = 3 <EOL>
|
||||
f'' <EOL>
|
||||
f'{123}' <EOL>
|
||||
f'{123}{var1}' <EOL>
|
||||
f'{123}ok' <EOL>
|
||||
f'ok{123}' <EOL>
|
||||
assigned = f'{123}' <EOL>
|
||||
print ( f'{123}' ) <EOL>
|
||||
print ( f'{123}{123}{var3}{123}' ) <EOL>
|
||||
print ( f'{var3}' ) <EOL>
|
||||
print ( f'{var3:4.5}' ) <EOL>
|
||||
print ( f'f-string {123}' ) <EOL>
|
||||
print ( f'{123}:\\s+' ) <EOL>
|
||||
print ( f'x{12}' * 3 ) <EOL>
|
||||
print ( f'f-string. \t\tformat value 0: {var1}, 1 (!s): {var2!s}, 2 (!r): {var2!r}, 3 (!a): {var2!a}, 4: {var3:6.3}, constant: {123}. End.' ) <EOL>
|
||||
print ( 'percent format %d ' % 444 + f'f-string {123} and {var1!s}' + f' add another f-str {var3:2.3}' + ' regular string regular string ' ) <EOL>
|
||||
print ( f'' ) <EOL>
|
||||
print ( f'"""{\'single quoted string\'} \'singles in f-string\' {"single quote \' inside"} "doubles in f-string" {\'double quoted string\'} " both \' {\'double quotes " inside\'}"""' ) <EOL>
|
||||
print ( f'' ) <EOL>
|
||||
print ( f'' ) <EOL>
|
||||
print ( f'{var3 * x} {var3:.2f} {var3:.5f} {x:02} {x * x:3} {x * x * x:4} {s1:>10} {a:x} {a:o} {a:e}' ) <EOL>
|
||||
print ( f'some {{braces}} {\'inner literal: {braces} {{double braces}}\'}' ) <EOL>
|
||||
print ( f'f-string dict {some_dict[2]} and {{function call in expression}}: {max([\n 1,\n 20,\n 3])}' ) <EOL>
|
||||
print ( f'{(lambda x: x * 2)(3)}' ) <EOL>
|
||||
msg = f'a {var1}coolmultiline {var2}\nf-string {var3}' <EOL>
|
Reference in New Issue
Block a user