Implement better handling for async for.

Instead of ignoring boilerplate when decompiling `async for`, build the loop semantically by deconstructing the BLK_WHILE -> BLK_CONTAINER -> BLK_TRY that python generates when compiling an `async for` (see https://www.python.org/dev/peps/pep-0492/#asynchronous-iterators-and-async-for).
When reading GET_AITER, convert the BLK_WHILE into a BLK_ASYNCFOR in the same way as a `for` loop.
Ignore GET_ANEXT.
Use END_FINALLY as the trigger to finish building the BLK_ASYNCFOR:
-> Extract the loop variable from the contents of the BLK_TRY.
-> Throw away the BLK_TRY and BLK_CONTAINER, leave the BLK_ASYNCFOR on top of the stack.
This commit is contained in:
Aralox
2020-10-21 19:32:20 +11:00
parent 8933c9a4a9
commit 91a23f914d
5 changed files with 67 additions and 128 deletions

View File

@@ -1,5 +1,4 @@
#include "ASTNode.h"
#include <bytecode.h>
/* ASTNodeList */
void ASTNodeList::removeLast()
@@ -80,41 +79,3 @@ const char* ASTBlock::type_str() const
};
return s_type_strings[blktype()];
}
/* ASTIterBlock */
const int ASTIterBlock::ASYNCFOR_BOILER_READLOOPINDEX = 4;
const int ASTIterBlock::ASYNCFOR_BOILER_READLOOPCONTENTS = 11;
const int ASTIterBlock::ASYNCFOR_BOILER_OFFSETFROMEND = 12;
const int ASTIterBlock::ASYNCFOR_BOILER_FIRSTJUMP = 5;
const Pyc::Opcode ASTIterBlock::ASYNCFOR_BOILER_ALTJUMPOP = Pyc::JUMP_ABSOLUTE_A;
const std::vector<Pyc::Opcode> ASTIterBlock::ASYNCFOR_BOILERPLATE = {
// Ignore all these after GET_AITER.
Pyc::SETUP_EXCEPT_A,
Pyc::GET_ANEXT,
Pyc::LOAD_CONST_A, // None
Pyc::YIELD_FROM,
// Process ops from here to load index variable.
// Continue ignoring all these
Pyc::POP_BLOCK,
Pyc::JUMP_FORWARD_A, // or JUMP_ABSOLUTE if pass
Pyc::DUP_TOP,
Pyc::LOAD_GLOBAL_A, // StopAsyncIteration
Pyc::COMPARE_OP_A, // (<EXCEPTION MATCH>)
Pyc::POP_JUMP_IF_TRUE_A,
Pyc::END_FINALLY,
// Actual code inside the loop begins here. 'Preamble' before this, 'Trailer' after.
// After reading a JUMP at the address we are expecting, ignore the following:
Pyc::POP_TOP,
Pyc::POP_TOP,
Pyc::POP_TOP,
Pyc::POP_EXCEPT,
Pyc::POP_TOP
// Then a POP_BLOCK as per normal loop
};

View File

@@ -514,17 +514,9 @@ private:
bool m_negative;
};
namespace Pyc { enum Opcode : int; }
class ASTIterBlock : public ASTBlock {
public:
static const std::vector<Pyc::Opcode> ASYNCFOR_BOILERPLATE;
static const int ASYNCFOR_BOILER_READLOOPINDEX;
static const int ASYNCFOR_BOILER_READLOOPCONTENTS;
static const int ASYNCFOR_BOILER_OFFSETFROMEND;
static const int ASYNCFOR_BOILER_FIRSTJUMP;
static const Pyc::Opcode ASYNCFOR_BOILER_ALTJUMPOP;
ASTIterBlock(ASTBlock::BlkType blktype, int end, PycRef<ASTNode> iter)
: ASTBlock(blktype, end), m_iter(std::move(iter)), m_idx(), m_comp() { }

View File

@@ -18,11 +18,6 @@ static bool printDocstringAndGlobals = false;
/* Use this to keep track of whether we need to print a class or module docstring */
static bool printClassDocstring = true;
/* Used to track what boilerplate ops to ignore when inside an 'async for'. */
static int currentAsyncForOpIndex = -1;
static int currentAsyncForEndPos = 0;
PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
{
PycBuffer source(code->code()->value(), code->code()->length());
@@ -59,51 +54,6 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
curpos = pos;
bc_next(source, mod, opcode, operand, pos);
if (currentAsyncForOpIndex > -1) {
if (currentAsyncForOpIndex < ASTIterBlock::ASYNCFOR_BOILER_READLOOPCONTENTS)
{
// Ignore boilerplate ops - we don't need this information to reconstruct the 'async for'.
const auto currentExpectedBoilerplateOp = ASTIterBlock::ASYNCFOR_BOILERPLATE[currentAsyncForOpIndex];
if (opcode == currentExpectedBoilerplateOp
|| (currentAsyncForOpIndex == ASTIterBlock::ASYNCFOR_BOILER_FIRSTJUMP
&& opcode == ASTIterBlock::ASYNCFOR_BOILER_ALTJUMPOP))
{
currentAsyncForOpIndex++;
continue;
}
else if (currentAsyncForOpIndex == ASTIterBlock::ASYNCFOR_BOILER_READLOOPINDEX) {
// Resume processing ops to read in the loop variable.
}
else {
fprintf(stderr, "Unexpected opcode %s when decompiling 'async for' preamble. (pos, ind): (%d, %d)\n",
Pyc::OpcodeName(opcode & 0xFF), pos, currentAsyncForOpIndex);
currentAsyncForOpIndex = -1;
}
}
else if (currentAsyncForOpIndex < static_cast<int>(ASTIterBlock::ASYNCFOR_BOILERPLATE.size()))
{
auto lastBoilerPos = currentAsyncForEndPos - ASTIterBlock::ASYNCFOR_BOILER_OFFSETFROMEND;
if (pos > lastBoilerPos)
{
// We have just finished processing the JUMP_ABSOLUTE at the end of the loop - resume ignoring boilerplate.
if (opcode == ASTIterBlock::ASYNCFOR_BOILERPLATE[currentAsyncForOpIndex]) {
currentAsyncForOpIndex++;
continue;
}
else {
fprintf(stderr, "Unexpected opcode %s when decompiling 'async for' trailer. (pos, end, ind): (%d, %d, %d)\n",
Pyc::OpcodeName(opcode & 0xFF), pos, lastBoilerPos, currentAsyncForOpIndex);
currentAsyncForOpIndex = -1;
}
}
}
else {
currentAsyncForOpIndex = -1;
currentAsyncForEndPos = 0;
}
}
if (need_try && opcode != Pyc::SETUP_EXCEPT_A) {
need_try = false;
@@ -740,23 +690,61 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
curblock = blocks.top();
curblock->append(final.cast<ASTNode>());
isFinally = true;
} else if (curblock->blktype() == ASTBlock::BLK_EXCEPT) {
/* Turn it into an else statement. */
}
else if (curblock->blktype() == ASTBlock::BLK_EXCEPT) {
blocks.pop();
PycRef<ASTBlock> prev = curblock;
if (curblock->size() != 0) {
blocks.top()->append(curblock.cast<ASTNode>());
}
curblock = blocks.top();
if (curblock->end() != pos || curblock.cast<ASTContainerBlock>()->hasFinally()) {
PycRef<ASTBlock> elseblk = new ASTBlock(ASTBlock::BLK_ELSE, prev->end());
elseblk->init();
blocks.push(elseblk);
bool isAsyncFor = false;
if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER)
{
auto container = blocks.top();
blocks.pop();
auto asyncForBlock = blocks.top();
isAsyncFor = asyncForBlock->blktype() == ASTBlock::BLK_ASYNCFOR;
if (isAsyncFor)
{
auto tryBlock = container->nodes().front().cast<ASTBlock>();
if (!tryBlock->nodes().empty() && tryBlock->blktype() == ASTBlock::BLK_TRY)
{
auto store = tryBlock->nodes().front().cast<ASTStore>();
if (store)
{
asyncForBlock.cast<ASTIterBlock>()->setIndex(store->dest());
}
}
curblock = blocks.top();
stack = stack_hist.top();
stack_hist.pop();
if (!curblock->inited())
fprintf(stderr, "Error when decompiling 'async for'.\n");
}
else
{
blocks.push(container);
}
}
if (!isAsyncFor) {
if (curblock->size() != 0) {
blocks.top()->append(curblock.cast<ASTNode>());
}
curblock = blocks.top();
} else {
stack = stack_hist.top();
stack_hist.pop();
/* Turn it into an else statement. */
if (curblock->end() != pos || curblock.cast<ASTContainerBlock>()->hasFinally()) {
PycRef<ASTBlock> elseblk = new ASTBlock(ASTBlock::BLK_ELSE, prev->end());
elseblk->init();
blocks.push(elseblk);
curblock = blocks.top();
}
else {
stack = stack_hist.top();
stack_hist.pop();
}
}
}
@@ -845,16 +833,15 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
else {
fprintf(stderr, "Unsupported use of GET_AITER outside of SETUP_LOOP\n");
}
currentAsyncForEndPos = top->end();
PycRef<ASTIterBlock> forblk = new ASTIterBlock(ASTBlock::BLK_ASYNCFOR, top->end(), iter);
blocks.push(forblk.cast<ASTBlock>());
curblock = blocks.top();
stack.push(NULL);
currentAsyncForOpIndex = 0;
}
break;
case Pyc::GET_ANEXT:
break;
case Pyc::GET_AWAITABLE:
{
PycRef<ASTNode> object = stack.top();
@@ -1455,8 +1442,6 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
break;
}
PycRef<ASTBlock> tmp;
if (curblock->nodes().size() &&
curblock->nodes().back().type() == ASTNode::NODE_KEYWORD) {
curblock->removeLast();
@@ -1475,8 +1460,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n");
}
}
tmp = curblock;
PycRef<ASTBlock> tmp = curblock;
blocks.pop();
if (!blocks.empty())
@@ -1487,7 +1471,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
curblock->append(tmp.cast<ASTNode>());
}
if ((tmp->blktype() == ASTBlock::BLK_FOR || tmp->blktype() == ASTBlock::BLK_ASYNCFOR) && tmp->end() >= pos) {
if (tmp->blktype() == ASTBlock::BLK_FOR && tmp->end() >= pos) {
stack_hist.push(stack);
PycRef<ASTBlock> blkelse = new ASTBlock(ASTBlock::BLK_ELSE, tmp->end());
@@ -1908,7 +1892,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> seq = stack.top();
stack.pop();
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
if (curblock->blktype() == ASTBlock::BLK_FOR
&& !curblock->inited()) {
PycRef<ASTTuple> tuple = tup.cast<ASTTuple>();
if (tuple != NULL)
@@ -1934,7 +1918,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
break;
}
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
if (curblock->blktype() == ASTBlock::BLK_FOR
&& !curblock->inited()) {
curblock.cast<ASTIterBlock>()->setIndex(name);
} else if (curblock->blktype() == ASTBlock::BLK_WITH
@@ -1963,7 +1947,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> seq = stack.top();
stack.pop();
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
if (curblock->blktype() == ASTBlock::BLK_FOR
&& !curblock->inited()) {
PycRef<ASTTuple> tuple = tup.cast<ASTTuple>();
if (tuple != NULL)
@@ -1999,7 +1983,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> seq = stack.top();
stack.pop();
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
if (curblock->blktype() == ASTBlock::BLK_FOR
&& !curblock->inited()) {
PycRef<ASTTuple> tuple = tup.cast<ASTTuple>();
if (tuple != NULL)
@@ -2027,7 +2011,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> name = new ASTName(varname);
if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR)
if (curblock->blktype() == ASTBlock::BLK_FOR
&& !curblock->inited()) {
curblock.cast<ASTIterBlock>()->setIndex(name);
} else if (stack.top().type() == ASTNode::NODE_IMPORT) {
@@ -2193,8 +2177,10 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.pop();
// TODO: Support yielding into a non-null destination
PycRef<ASTNode> value = stack.top();
value->setProcessed();
curblock->append(new ASTReturn(value, ASTReturn::YIELD_FROM));
if (value) {
value->setProcessed();
curblock->append(new ASTReturn(value, ASTReturn::YIELD_FROM));
}
}
break;
case Pyc::YIELD_VALUE:

View File

@@ -4,7 +4,7 @@
namespace Pyc {
enum Opcode : int {
enum Opcode {
#define OPCODE(x) x,
#define OPCODE_A_FIRST(x) PYC_HAVE_ARG, x##_A = PYC_HAVE_ARG,
#define OPCODE_A(x) x##_A,

View File

@@ -21,7 +21,7 @@ break <EOL>
<OUTDENT>
async def time_for_some_fun ( ) : <EOL>
<INDENT>
async for x , y in myfunc ( c ) : <EOL>
async for ( x , y ) in myfunc ( c ) : <EOL>
<INDENT>
print ( 123 ) <EOL>
if x == 3 : <EOL>
@@ -54,7 +54,7 @@ print ( 'ridiculous nesting' ) <EOL>
<OUTDENT>
<OUTDENT>
<OUTDENT>
async for q , w , e , r in qwer : <EOL>
async for ( q , w , e , r ) in qwer : <EOL>
<INDENT>
u = 6 <EOL>
<OUTDENT>