Add support for async for. Issue #124

Ignore 'boilerplate' opcodes that set up the async iteration logic internally (https://www.python.org/dev/peps/pep-0492/#asynchronous-iterators-and-async-for) - We don’t need any of this as GET_AITER is enough to tell us that we are in an `async for`.
This commit is contained in:
Aralox
2020-10-20 20:39:23 +11:00
parent 8fdfe170fc
commit 8933c9a4a9
7 changed files with 248 additions and 13 deletions

View File

@@ -1,4 +1,5 @@
#include "ASTNode.h"
#include <bytecode.h>
/* ASTNodeList */
void ASTNodeList::removeLast()
@@ -75,7 +76,45 @@ const char* ASTBlock::type_str() const
{
static const char* s_type_strings[] = {
"", "if", "else", "elif", "try", "CONTAINER", "except",
"finally", "while", "for", "with",
"finally", "while", "for", "with", "async for"
};
return s_type_strings[blktype()];
}
/* ASTIterBlock */
const int ASTIterBlock::ASYNCFOR_BOILER_READLOOPINDEX = 4;
const int ASTIterBlock::ASYNCFOR_BOILER_READLOOPCONTENTS = 11;
const int ASTIterBlock::ASYNCFOR_BOILER_OFFSETFROMEND = 12;
const int ASTIterBlock::ASYNCFOR_BOILER_FIRSTJUMP = 5;
const Pyc::Opcode ASTIterBlock::ASYNCFOR_BOILER_ALTJUMPOP = Pyc::JUMP_ABSOLUTE_A;
const std::vector<Pyc::Opcode> ASTIterBlock::ASYNCFOR_BOILERPLATE = {
// Ignore all these after GET_AITER.
Pyc::SETUP_EXCEPT_A,
Pyc::GET_ANEXT,
Pyc::LOAD_CONST_A, // None
Pyc::YIELD_FROM,
// Process ops from here to load index variable.
// Continue ignoring all these
Pyc::POP_BLOCK,
Pyc::JUMP_FORWARD_A, // or JUMP_ABSOLUTE if pass
Pyc::DUP_TOP,
Pyc::LOAD_GLOBAL_A, // StopAsyncIteration
Pyc::COMPARE_OP_A, // (<EXCEPTION MATCH>)
Pyc::POP_JUMP_IF_TRUE_A,
Pyc::END_FINALLY,
// Actual code inside the loop begins here. 'Preamble' before this, 'Trailer' after.
// After reading a JUMP at the address we are expecting, ignore the following:
Pyc::POP_TOP,
Pyc::POP_TOP,
Pyc::POP_TOP,
Pyc::POP_EXCEPT,
Pyc::POP_TOP
// Then a POP_BLOCK as per normal loop
};

View File

@@ -465,7 +465,7 @@ public:
enum BlkType {
BLK_MAIN, BLK_IF, BLK_ELSE, BLK_ELIF, BLK_TRY,
BLK_CONTAINER, BLK_EXCEPT, BLK_FINALLY,
BLK_WHILE, BLK_FOR, BLK_WITH
BLK_WHILE, BLK_FOR, BLK_WITH, BLK_ASYNCFOR
};
ASTBlock(BlkType blktype, int end = 0, int inited = 0)
@@ -514,9 +514,17 @@ private:
bool m_negative;
};
namespace Pyc { enum Opcode : int; }
class ASTIterBlock : public ASTBlock {
public:
static const std::vector<Pyc::Opcode> ASYNCFOR_BOILERPLATE;
static const int ASYNCFOR_BOILER_READLOOPINDEX;
static const int ASYNCFOR_BOILER_READLOOPCONTENTS;
static const int ASYNCFOR_BOILER_OFFSETFROMEND;
static const int ASYNCFOR_BOILER_FIRSTJUMP;
static const Pyc::Opcode ASYNCFOR_BOILER_ALTJUMPOP;
ASTIterBlock(ASTBlock::BlkType blktype, int end, PycRef<ASTNode> iter)
: ASTBlock(blktype, end), m_iter(std::move(iter)), m_idx(), m_comp() { }

View File

@@ -18,6 +18,11 @@ static bool printDocstringAndGlobals = false;
/* Use this to keep track of whether we need to print a class or module docstring */
static bool printClassDocstring = true;
/* Used to track what boilerplate ops to ignore when inside an 'async for'. */
static int currentAsyncForOpIndex = -1;
static int currentAsyncForEndPos = 0;
PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
{
PycBuffer source(code->code()->value(), code->code()->length());
@@ -54,6 +59,51 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
curpos = pos;
bc_next(source, mod, opcode, operand, pos);
if (currentAsyncForOpIndex > -1) {
if (currentAsyncForOpIndex < ASTIterBlock::ASYNCFOR_BOILER_READLOOPCONTENTS)
{
// Ignore boilerplate ops - we don't need this information to reconstruct the 'async for'.
const auto currentExpectedBoilerplateOp = ASTIterBlock::ASYNCFOR_BOILERPLATE[currentAsyncForOpIndex];
if (opcode == currentExpectedBoilerplateOp
|| (currentAsyncForOpIndex == ASTIterBlock::ASYNCFOR_BOILER_FIRSTJUMP
&& opcode == ASTIterBlock::ASYNCFOR_BOILER_ALTJUMPOP))
{
currentAsyncForOpIndex++;
continue;
}
else if (currentAsyncForOpIndex == ASTIterBlock::ASYNCFOR_BOILER_READLOOPINDEX) {
// Resume processing ops to read in the loop variable.
}
else {
fprintf(stderr, "Unexpected opcode %s when decompiling 'async for' preamble. (pos, ind): (%d, %d)\n",
Pyc::OpcodeName(opcode & 0xFF), pos, currentAsyncForOpIndex);
currentAsyncForOpIndex = -1;
}
}
else if (currentAsyncForOpIndex < static_cast<int>(ASTIterBlock::ASYNCFOR_BOILERPLATE.size()))
{
auto lastBoilerPos = currentAsyncForEndPos - ASTIterBlock::ASYNCFOR_BOILER_OFFSETFROMEND;
if (pos > lastBoilerPos)
{
// We have just finished processing the JUMP_ABSOLUTE at the end of the loop - resume ignoring boilerplate.
if (opcode == ASTIterBlock::ASYNCFOR_BOILERPLATE[currentAsyncForOpIndex]) {
currentAsyncForOpIndex++;
continue;
}
else {
fprintf(stderr, "Unexpected opcode %s when decompiling 'async for' trailer. (pos, end, ind): (%d, %d, %d)\n",
Pyc::OpcodeName(opcode & 0xFF), pos, lastBoilerPos, currentAsyncForOpIndex);
currentAsyncForOpIndex = -1;
}
}
}
else {
currentAsyncForOpIndex = -1;
currentAsyncForEndPos = 0;
}
}
if (need_try && opcode != Pyc::SETUP_EXCEPT_A) {
need_try = false;
@@ -782,6 +832,29 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(NULL); // We can totally hack this >_>
}
break;
case Pyc::GET_AITER:
{
// Logic very similar to FOR_ITER_A
PycRef<ASTNode> iter = stack.top(); // Iterable
stack.pop();
PycRef<ASTBlock> top = blocks.top();
if (top->blktype() == ASTBlock::BLK_WHILE) {
blocks.pop();
}
else {
fprintf(stderr, "Unsupported use of GET_AITER outside of SETUP_LOOP\n");
}
currentAsyncForEndPos = top->end();
PycRef<ASTIterBlock> forblk = new ASTIterBlock(ASTBlock::BLK_ASYNCFOR, top->end(), iter);
blocks.push(forblk.cast<ASTBlock>());
curblock = blocks.top();
stack.push(NULL);
currentAsyncForOpIndex = 0;
}
break;
case Pyc::GET_AWAITABLE:
{
PycRef<ASTNode> object = stack.top();
@@ -1414,7 +1487,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
curblock->append(tmp.cast<ASTNode>());
}
if (tmp->blktype() == ASTBlock::BLK_FOR && tmp->end() >= pos) {
if ((tmp->blktype() == ASTBlock::BLK_FOR || tmp->blktype() == ASTBlock::BLK_ASYNCFOR) && tmp->end() >= pos) {
stack_hist.push(stack);
PycRef<ASTBlock> blkelse = new ASTBlock(ASTBlock::BLK_ELSE, tmp->end());
@@ -1423,8 +1496,9 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
if (curblock->blktype() == ASTBlock::BLK_TRY
&& tmp->blktype() != ASTBlock::BLK_FOR
&& tmp->blktype() != ASTBlock::BLK_WHILE) {
&& tmp->blktype() != ASTBlock::BLK_FOR
&& tmp->blktype() != ASTBlock::BLK_ASYNCFOR
&& tmp->blktype() != ASTBlock::BLK_WHILE) {
stack = stack_hist.top();
stack_hist.pop();
@@ -1460,7 +1534,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
}
if (curblock->blktype() == ASTBlock::BLK_FOR
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
&& curblock->end() == pos) {
blocks.pop();
blocks.top()->append(curblock.cast<ASTNode>());
@@ -1834,7 +1908,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> seq = stack.top();
stack.pop();
if (curblock->blktype() == ASTBlock::BLK_FOR
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
&& !curblock->inited()) {
PycRef<ASTTuple> tuple = tup.cast<ASTTuple>();
if (tuple != NULL)
@@ -1860,7 +1934,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
break;
}
if (curblock->blktype() == ASTBlock::BLK_FOR
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
&& !curblock->inited()) {
curblock.cast<ASTIterBlock>()->setIndex(name);
} else if (curblock->blktype() == ASTBlock::BLK_WITH
@@ -1889,7 +1963,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> seq = stack.top();
stack.pop();
if (curblock->blktype() == ASTBlock::BLK_FOR
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
&& !curblock->inited()) {
PycRef<ASTTuple> tuple = tup.cast<ASTTuple>();
if (tuple != NULL)
@@ -1925,7 +1999,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> seq = stack.top();
stack.pop();
if (curblock->blktype() == ASTBlock::BLK_FOR
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
&& !curblock->inited()) {
PycRef<ASTTuple> tuple = tup.cast<ASTTuple>();
if (tuple != NULL)
@@ -1953,7 +2027,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> name = new ASTName(varname);
if (curblock->blktype() == ASTBlock::BLK_FOR
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
&& !curblock->inited()) {
curblock.cast<ASTIterBlock>()->setIndex(name);
} else if (stack.top().type() == ASTNode::NODE_IMPORT) {
@@ -2469,7 +2543,7 @@ void print_src(PycRef<ASTNode> node, PycModule* mod)
fputs(" ", pyc_output);
print_src(blk.cast<ASTCondBlock>()->cond(), mod);
} else if (blk->blktype() == ASTBlock::BLK_FOR) {
} else if (blk->blktype() == ASTBlock::BLK_FOR || blk->blktype() == ASTBlock::BLK_ASYNCFOR) {
fputs(" ", pyc_output);
print_src(blk.cast<ASTIterBlock>()->index(), mod);
fputs(" in ", pyc_output);

View File

@@ -4,7 +4,7 @@
namespace Pyc {
enum Opcode {
enum Opcode : int {
#define OPCODE(x) x,
#define OPCODE_A_FIRST(x) PYC_HAVE_ARG, x##_A = PYC_HAVE_ARG,
#define OPCODE_A(x) x##_A,

Binary file not shown.

44
tests/input/async_for.py Normal file
View File

@@ -0,0 +1,44 @@
async def a(b, c):
async for b in c:
pass
async def a(b, c):
async for b in c:
continue
async def a(b, c):
async for b in c:
break
async def time_for_some_fun():
async for (x, y) in myfunc(c):
print(123)
if (x == 3):
print('something')
break
for i in regular_for:
var1 = var2 + var3
async for x1 in print:
print('test LOAD_GLOBAL')
async for x2 in inner:
for x3 in regular:
async for x4 in inner2:
async for x5 in inner3:
async for x6 in inner4:
print('ridiculous nesting')
async for (q, w, e, r) in qwer:
u = 1 + 2 + 3
async for x4 in inner2:
async for x5 in inner3:
pass
print('outside loop')
print ('outside function')
# The following will LOAD_METHOD, not GET_AITER or GET_ANEXT.
# test.__anext__(iter)
# test.__aiter__(iter)

View File

@@ -0,0 +1,70 @@
async def a ( b , c ) : <EOL>
<INDENT>
async for b in c : <EOL>
<INDENT>
pass <EOL>
<OUTDENT>
<OUTDENT>
async def a ( b , c ) : <EOL>
<INDENT>
async for b in c : <EOL>
<INDENT>
continue <EOL>
<OUTDENT>
<OUTDENT>
async def a ( b , c ) : <EOL>
<INDENT>
async for b in c : <EOL>
<INDENT>
break <EOL>
<OUTDENT>
<OUTDENT>
async def time_for_some_fun ( ) : <EOL>
<INDENT>
async for x , y in myfunc ( c ) : <EOL>
<INDENT>
print ( 123 ) <EOL>
if x == 3 : <EOL>
<INDENT>
print ( 'something' ) <EOL>
break <EOL>
<OUTDENT>
for i in regular_for : <EOL>
<INDENT>
var1 = var2 + var3 <EOL>
async for x1 in print : <EOL>
<INDENT>
print ( 'test LOAD_GLOBAL' ) <EOL>
async for x2 in inner : <EOL>
<INDENT>
for x3 in regular : <EOL>
<INDENT>
async for x4 in inner2 : <EOL>
<INDENT>
async for x5 in inner3 : <EOL>
<INDENT>
async for x6 in inner4 : <EOL>
<INDENT>
print ( 'ridiculous nesting' ) <EOL>
<OUTDENT>
<OUTDENT>
<OUTDENT>
<OUTDENT>
<OUTDENT>
<OUTDENT>
<OUTDENT>
<OUTDENT>
async for q , w , e , r in qwer : <EOL>
<INDENT>
u = 6 <EOL>
<OUTDENT>
async for x4 in inner2 : <EOL>
<INDENT>
async for x5 in inner3 : <EOL>
<INDENT>
pass <EOL>
<OUTDENT>
<OUTDENT>
print ( 'outside loop' ) <EOL>
<OUTDENT>
print ( 'outside function' ) <EOL>