From 8933c9a4a9d359082c36fedf545f19ce5fbba61b Mon Sep 17 00:00:00 2001 From: Aralox Date: Tue, 20 Oct 2020 20:39:23 +1100 Subject: [PATCH] Add support for `async for`. Issue #124 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ignore 'boilerplate' opcodes that set up the async iteration logic internally (https://www.python.org/dev/peps/pep-0492/#asynchronous-iterators-and-async-for) - We don’t need any of this as GET_AITER is enough to tell us that we are in an `async for`. --- ASTNode.cpp | 41 +++++++++++++- ASTNode.h | 10 +++- ASTree.cpp | 94 +++++++++++++++++++++++++++---- bytecode.h | 2 +- tests/compiled/async_for.3.7.pyc | Bin 0 -> 1471 bytes tests/input/async_for.py | 44 +++++++++++++++ tests/tokenized/async_for.txt | 70 +++++++++++++++++++++++ 7 files changed, 248 insertions(+), 13 deletions(-) create mode 100644 tests/compiled/async_for.3.7.pyc create mode 100644 tests/input/async_for.py create mode 100644 tests/tokenized/async_for.txt diff --git a/ASTNode.cpp b/ASTNode.cpp index d375b65..247183a 100644 --- a/ASTNode.cpp +++ b/ASTNode.cpp @@ -1,4 +1,5 @@ #include "ASTNode.h" +#include /* ASTNodeList */ void ASTNodeList::removeLast() @@ -75,7 +76,45 @@ const char* ASTBlock::type_str() const { static const char* s_type_strings[] = { "", "if", "else", "elif", "try", "CONTAINER", "except", - "finally", "while", "for", "with", + "finally", "while", "for", "with", "async for" }; return s_type_strings[blktype()]; } + + +/* ASTIterBlock */ +const int ASTIterBlock::ASYNCFOR_BOILER_READLOOPINDEX = 4; +const int ASTIterBlock::ASYNCFOR_BOILER_READLOOPCONTENTS = 11; +const int ASTIterBlock::ASYNCFOR_BOILER_OFFSETFROMEND = 12; +const int ASTIterBlock::ASYNCFOR_BOILER_FIRSTJUMP = 5; +const Pyc::Opcode ASTIterBlock::ASYNCFOR_BOILER_ALTJUMPOP = Pyc::JUMP_ABSOLUTE_A; + +const std::vector ASTIterBlock::ASYNCFOR_BOILERPLATE = { + // Ignore all these after GET_AITER. + Pyc::SETUP_EXCEPT_A, + Pyc::GET_ANEXT, + Pyc::LOAD_CONST_A, // None + Pyc::YIELD_FROM, + + // Process ops from here to load index variable. + + // Continue ignoring all these + Pyc::POP_BLOCK, + Pyc::JUMP_FORWARD_A, // or JUMP_ABSOLUTE if pass + Pyc::DUP_TOP, + Pyc::LOAD_GLOBAL_A, // StopAsyncIteration + Pyc::COMPARE_OP_A, // () + Pyc::POP_JUMP_IF_TRUE_A, + Pyc::END_FINALLY, + + // Actual code inside the loop begins here. 'Preamble' before this, 'Trailer' after. + + // After reading a JUMP at the address we are expecting, ignore the following: + Pyc::POP_TOP, + Pyc::POP_TOP, + Pyc::POP_TOP, + Pyc::POP_EXCEPT, + Pyc::POP_TOP + + // Then a POP_BLOCK as per normal loop +}; diff --git a/ASTNode.h b/ASTNode.h index 728c7f4..7eb7c84 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -465,7 +465,7 @@ public: enum BlkType { BLK_MAIN, BLK_IF, BLK_ELSE, BLK_ELIF, BLK_TRY, BLK_CONTAINER, BLK_EXCEPT, BLK_FINALLY, - BLK_WHILE, BLK_FOR, BLK_WITH + BLK_WHILE, BLK_FOR, BLK_WITH, BLK_ASYNCFOR }; ASTBlock(BlkType blktype, int end = 0, int inited = 0) @@ -514,9 +514,17 @@ private: bool m_negative; }; +namespace Pyc { enum Opcode : int; } class ASTIterBlock : public ASTBlock { public: + static const std::vector ASYNCFOR_BOILERPLATE; + static const int ASYNCFOR_BOILER_READLOOPINDEX; + static const int ASYNCFOR_BOILER_READLOOPCONTENTS; + static const int ASYNCFOR_BOILER_OFFSETFROMEND; + static const int ASYNCFOR_BOILER_FIRSTJUMP; + static const Pyc::Opcode ASYNCFOR_BOILER_ALTJUMPOP; + ASTIterBlock(ASTBlock::BlkType blktype, int end, PycRef iter) : ASTBlock(blktype, end), m_iter(std::move(iter)), m_idx(), m_comp() { } diff --git a/ASTree.cpp b/ASTree.cpp index 277c1b9..4653341 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -18,6 +18,11 @@ static bool printDocstringAndGlobals = false; /* Use this to keep track of whether we need to print a class or module docstring */ static bool printClassDocstring = true; +/* Used to track what boilerplate ops to ignore when inside an 'async for'. */ +static int currentAsyncForOpIndex = -1; +static int currentAsyncForEndPos = 0; + + PycRef BuildFromCode(PycRef code, PycModule* mod) { PycBuffer source(code->code()->value(), code->code()->length()); @@ -54,6 +59,51 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) curpos = pos; bc_next(source, mod, opcode, operand, pos); + + if (currentAsyncForOpIndex > -1) { + + if (currentAsyncForOpIndex < ASTIterBlock::ASYNCFOR_BOILER_READLOOPCONTENTS) + { + // Ignore boilerplate ops - we don't need this information to reconstruct the 'async for'. + const auto currentExpectedBoilerplateOp = ASTIterBlock::ASYNCFOR_BOILERPLATE[currentAsyncForOpIndex]; + if (opcode == currentExpectedBoilerplateOp + || (currentAsyncForOpIndex == ASTIterBlock::ASYNCFOR_BOILER_FIRSTJUMP + && opcode == ASTIterBlock::ASYNCFOR_BOILER_ALTJUMPOP)) + { + currentAsyncForOpIndex++; + continue; + } + else if (currentAsyncForOpIndex == ASTIterBlock::ASYNCFOR_BOILER_READLOOPINDEX) { + // Resume processing ops to read in the loop variable. + } + else { + fprintf(stderr, "Unexpected opcode %s when decompiling 'async for' preamble. (pos, ind): (%d, %d)\n", + Pyc::OpcodeName(opcode & 0xFF), pos, currentAsyncForOpIndex); + currentAsyncForOpIndex = -1; + } + } + else if (currentAsyncForOpIndex < static_cast(ASTIterBlock::ASYNCFOR_BOILERPLATE.size())) + { + auto lastBoilerPos = currentAsyncForEndPos - ASTIterBlock::ASYNCFOR_BOILER_OFFSETFROMEND; + if (pos > lastBoilerPos) + { + // We have just finished processing the JUMP_ABSOLUTE at the end of the loop - resume ignoring boilerplate. + if (opcode == ASTIterBlock::ASYNCFOR_BOILERPLATE[currentAsyncForOpIndex]) { + currentAsyncForOpIndex++; + continue; + } + else { + fprintf(stderr, "Unexpected opcode %s when decompiling 'async for' trailer. (pos, end, ind): (%d, %d, %d)\n", + Pyc::OpcodeName(opcode & 0xFF), pos, lastBoilerPos, currentAsyncForOpIndex); + currentAsyncForOpIndex = -1; + } + } + } + else { + currentAsyncForOpIndex = -1; + currentAsyncForEndPos = 0; + } + } if (need_try && opcode != Pyc::SETUP_EXCEPT_A) { need_try = false; @@ -782,6 +832,29 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(NULL); // We can totally hack this >_> } break; + case Pyc::GET_AITER: + { + // Logic very similar to FOR_ITER_A + PycRef iter = stack.top(); // Iterable + stack.pop(); + + PycRef top = blocks.top(); + if (top->blktype() == ASTBlock::BLK_WHILE) { + blocks.pop(); + } + else { + fprintf(stderr, "Unsupported use of GET_AITER outside of SETUP_LOOP\n"); + } + + currentAsyncForEndPos = top->end(); + PycRef forblk = new ASTIterBlock(ASTBlock::BLK_ASYNCFOR, top->end(), iter); + blocks.push(forblk.cast()); + curblock = blocks.top(); + + stack.push(NULL); + currentAsyncForOpIndex = 0; + } + break; case Pyc::GET_AWAITABLE: { PycRef object = stack.top(); @@ -1414,7 +1487,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) curblock->append(tmp.cast()); } - if (tmp->blktype() == ASTBlock::BLK_FOR && tmp->end() >= pos) { + if ((tmp->blktype() == ASTBlock::BLK_FOR || tmp->blktype() == ASTBlock::BLK_ASYNCFOR) && tmp->end() >= pos) { stack_hist.push(stack); PycRef blkelse = new ASTBlock(ASTBlock::BLK_ELSE, tmp->end()); @@ -1423,8 +1496,9 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } if (curblock->blktype() == ASTBlock::BLK_TRY - && tmp->blktype() != ASTBlock::BLK_FOR - && tmp->blktype() != ASTBlock::BLK_WHILE) { + && tmp->blktype() != ASTBlock::BLK_FOR + && tmp->blktype() != ASTBlock::BLK_ASYNCFOR + && tmp->blktype() != ASTBlock::BLK_WHILE) { stack = stack_hist.top(); stack_hist.pop(); @@ -1460,7 +1534,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } - if (curblock->blktype() == ASTBlock::BLK_FOR + if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && curblock->end() == pos) { blocks.pop(); blocks.top()->append(curblock.cast()); @@ -1834,7 +1908,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef seq = stack.top(); stack.pop(); - if (curblock->blktype() == ASTBlock::BLK_FOR + if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && !curblock->inited()) { PycRef tuple = tup.cast(); if (tuple != NULL) @@ -1860,7 +1934,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) break; } - if (curblock->blktype() == ASTBlock::BLK_FOR + if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (curblock->blktype() == ASTBlock::BLK_WITH @@ -1889,7 +1963,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef seq = stack.top(); stack.pop(); - if (curblock->blktype() == ASTBlock::BLK_FOR + if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && !curblock->inited()) { PycRef tuple = tup.cast(); if (tuple != NULL) @@ -1925,7 +1999,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef seq = stack.top(); stack.pop(); - if (curblock->blktype() == ASTBlock::BLK_FOR + if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && !curblock->inited()) { PycRef tuple = tup.cast(); if (tuple != NULL) @@ -1953,7 +2027,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) PycRef name = new ASTName(varname); - if (curblock->blktype() == ASTBlock::BLK_FOR + if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (stack.top().type() == ASTNode::NODE_IMPORT) { @@ -2469,7 +2543,7 @@ void print_src(PycRef node, PycModule* mod) fputs(" ", pyc_output); print_src(blk.cast()->cond(), mod); - } else if (blk->blktype() == ASTBlock::BLK_FOR) { + } else if (blk->blktype() == ASTBlock::BLK_FOR || blk->blktype() == ASTBlock::BLK_ASYNCFOR) { fputs(" ", pyc_output); print_src(blk.cast()->index(), mod); fputs(" in ", pyc_output); diff --git a/bytecode.h b/bytecode.h index 06ca0c1..e2b54bb 100644 --- a/bytecode.h +++ b/bytecode.h @@ -4,7 +4,7 @@ namespace Pyc { -enum Opcode { +enum Opcode : int { #define OPCODE(x) x, #define OPCODE_A_FIRST(x) PYC_HAVE_ARG, x##_A = PYC_HAVE_ARG, #define OPCODE_A(x) x##_A, diff --git a/tests/compiled/async_for.3.7.pyc b/tests/compiled/async_for.3.7.pyc new file mode 100644 index 0000000000000000000000000000000000000000..679a23d6faa8adfa88042926bd6dfac0d38ebb26 GIT binary patch literal 1471 zcmb7E&1(}u6rY)FHk)+Q#!^sJB=jJrl%h%WAR@K|KOnS)3XO|3q}gpQ=jY=%Jl`tDWz zdxDT}C>%EilRH4(A`B)m=?h{IlZ|;|%oQ^U>(pYVdPpfTQyZk9HE}OCPecu&mnc_(pAM=B8h>#jsCfi60T#zI3XEfr z9ByxfM8e@o6!BD?{K4@*5S+~TUI4=Sq?`FC2$+Bq2q4Hd17i+Irsh1vFr>aCh@gj< zpxh`yk6Azhx=9=zz_7i{ka{~cA|Uyg>;rfXdyH2=PN2MwJ}wHS0(@&WU?M5hvm{8s zDieuF+Lf5chti{h9{kTD6|qak?C#cx|3+vPvh_Mp{h>%lEK@PdGFWblOhm86^p`3Jm#0NmF<{R1-%|!@ox$pFwBkbS_v}2RGwIex^E Y(@!91x7x1RYgxDDm!^T4rpSKbH}U5mq5uE@ literal 0 HcmV?d00001 diff --git a/tests/input/async_for.py b/tests/input/async_for.py new file mode 100644 index 0000000..6a8ba92 --- /dev/null +++ b/tests/input/async_for.py @@ -0,0 +1,44 @@ +async def a(b, c): + async for b in c: + pass + +async def a(b, c): + async for b in c: + continue + +async def a(b, c): + async for b in c: + break + +async def time_for_some_fun(): + async for (x, y) in myfunc(c): + print(123) + if (x == 3): + print('something') + break + + for i in regular_for: + var1 = var2 + var3 + async for x1 in print: + print('test LOAD_GLOBAL') + async for x2 in inner: + for x3 in regular: + async for x4 in inner2: + async for x5 in inner3: + async for x6 in inner4: + print('ridiculous nesting') + + async for (q, w, e, r) in qwer: + u = 1 + 2 + 3 + + async for x4 in inner2: + async for x5 in inner3: + pass + + print('outside loop') + +print ('outside function') + +# The following will LOAD_METHOD, not GET_AITER or GET_ANEXT. +# test.__anext__(iter) +# test.__aiter__(iter) diff --git a/tests/tokenized/async_for.txt b/tests/tokenized/async_for.txt new file mode 100644 index 0000000..c27788c --- /dev/null +++ b/tests/tokenized/async_for.txt @@ -0,0 +1,70 @@ +async def a ( b , c ) : + +async for b in c : + +pass + + +async def a ( b , c ) : + +async for b in c : + +continue + + +async def a ( b , c ) : + +async for b in c : + +break + + +async def time_for_some_fun ( ) : + +async for x , y in myfunc ( c ) : + +print ( 123 ) +if x == 3 : + +print ( 'something' ) +break + +for i in regular_for : + +var1 = var2 + var3 +async for x1 in print : + +print ( 'test LOAD_GLOBAL' ) +async for x2 in inner : + +for x3 in regular : + +async for x4 in inner2 : + +async for x5 in inner3 : + +async for x6 in inner4 : + +print ( 'ridiculous nesting' ) + + + + + + + + +async for q , w , e , r in qwer : + +u = 6 + +async for x4 in inner2 : + +async for x5 in inner3 : + +pass + + +print ( 'outside loop' ) + +print ( 'outside function' )