#include #include #include #include "ASTree.h" #include "FastStack.h" #include "pyc_numeric.h" #include "bytecode.h" // This must be a triple quote (''' or """), to handle interpolated string literals containing the opposite quote style. // E.g. f'''{"interpolated "123' literal"}''' -> valid. // E.g. f"""{"interpolated "123' literal"}""" -> valid. // E.g. f'{"interpolated "123' literal"}' -> invalid, unescaped quotes in literal. // E.g. f'{"interpolated \"123\' literal"}' -> invalid, f-string expression does not allow backslash. // NOTE: Nested f-strings not supported. #define F_STRING_QUOTE "'''" static void append_to_chain_store(PycRef &chainStore, PycRef item, FastStack& stack, PycRef &curblock); /* Use this to determine if an error occurred (and therefore, if we should * avoid cleaning the output tree) */ static bool cleanBuild; /* Use this to prevent printing return keywords and newlines in lambdas. */ static bool inLambda = false; /* Use this to keep track of whether we need to print out any docstring and * the list of global variables that we are using (such as inside a function). */ static bool printDocstringAndGlobals = false; /* Use this to keep track of whether we need to print a class or module docstring */ static bool printClassDocstring = true; // shortcut for all top/pop calls static PycRef StackPopTop(FastStack& stack) { const auto node{ stack.top() }; stack.pop(); return node; } /* compiler generates very, VERY similar byte code for if/else statement block and if-expression * statement * if a: b = 1 * else: b = 2 * expression: * b = 1 if a else 2 * (see for instance https://stackoverflow.com/a/52202007) * here, try to guess if just finished else statement is part of if-expression (ternary operator) * if it is, remove statements from the block and put a ternary node on top of stack */ static void CheckIfExpr(FastStack& stack, PycRef curblock) { if (stack.empty()) return; if (curblock->nodes().size() < 2) return; auto rit = curblock->nodes().crbegin(); // the last is "else" block, the one before should be "if" (could be "for", ...) if ((*rit)->type() != ASTNode::NODE_BLOCK || (*rit).cast()->blktype() != ASTBlock::BLK_ELSE) return; ++rit; if ((*rit)->type() != ASTNode::NODE_BLOCK || (*rit).cast()->blktype() != ASTBlock::BLK_IF) return; auto else_expr = StackPopTop(stack); curblock->removeLast(); auto if_block = curblock->nodes().back(); auto if_expr = StackPopTop(stack); curblock->removeLast(); stack.push(new ASTTernary(std::move(if_block), std::move(if_expr), std::move(else_expr))); } PycRef BuildFromCode(PycRef code, PycModule* mod) { PycBuffer source(code->code()->value(), code->code()->length()); FastStack stack((mod->majorVer() == 1) ? 20 : code->stackSize()); stackhist_t stack_hist; std::stack > blocks; PycRef defblock = new ASTBlock(ASTBlock::BLK_MAIN); defblock->init(); PycRef curblock = defblock; blocks.push(defblock); int opcode, operand; int curpos = 0; int pos = 0; int unpack = 0; bool else_pop = false; bool need_try = false; bool variable_annotations = false; while (!source.atEof()) { #if defined(BLOCK_DEBUG) || defined(STACK_DEBUG) fprintf(stderr, "%-7d", pos); #ifdef STACK_DEBUG fprintf(stderr, "%-5d", (unsigned int)stack_hist.size() + 1); #endif #ifdef BLOCK_DEBUG for (unsigned int i = 0; i < blocks.size(); i++) fprintf(stderr, " "); fprintf(stderr, "%s (%d)", curblock->type_str(), curblock->end()); #endif fprintf(stderr, "\n"); #endif curpos = pos; bc_next(source, mod, opcode, operand, pos); if (need_try && opcode != Pyc::SETUP_EXCEPT_A) { need_try = false; /* Store the current stack for the except/finally statement(s) */ stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, curblock->end(), true); blocks.push(tryblock); curblock = blocks.top(); } else if (else_pop && opcode != Pyc::JUMP_FORWARD_A && opcode != Pyc::JUMP_IF_FALSE_A && opcode != Pyc::JUMP_IF_FALSE_OR_POP_A && opcode != Pyc::POP_JUMP_IF_FALSE_A && opcode != Pyc::JUMP_IF_TRUE_A && opcode != Pyc::JUMP_IF_TRUE_OR_POP_A && opcode != Pyc::POP_JUMP_IF_TRUE_A && opcode != Pyc::POP_BLOCK) { else_pop = false; PycRef prev = curblock; while (prev->end() < pos && prev->blktype() != ASTBlock::BLK_MAIN) { if (prev->blktype() != ASTBlock::BLK_CONTAINER) { if (prev->end() == 0) { break; } /* We want to keep the stack the same, but we need to pop * a level off the history. */ //stack = stack_hist.top(); if (!stack_hist.empty()) stack_hist.pop(); } blocks.pop(); if (blocks.empty()) break; curblock = blocks.top(); curblock->append(prev.cast()); prev = curblock; CheckIfExpr(stack, curblock); } } switch (opcode) { case Pyc::BINARY_OP_A: { ASTBinary::BinOp op = ASTBinary::from_binary_op(operand); if (op == ASTBinary::BIN_INVALID) fprintf(stderr, "Unsupported `BINARY_OP` operand value: %d\n", operand); PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTBinary(left, right, op)); } break; case Pyc::BINARY_ADD: case Pyc::BINARY_AND: case Pyc::BINARY_DIVIDE: case Pyc::BINARY_FLOOR_DIVIDE: case Pyc::BINARY_LSHIFT: case Pyc::BINARY_MODULO: case Pyc::BINARY_MULTIPLY: case Pyc::BINARY_OR: case Pyc::BINARY_POWER: case Pyc::BINARY_RSHIFT: case Pyc::BINARY_SUBTRACT: case Pyc::BINARY_TRUE_DIVIDE: case Pyc::BINARY_XOR: case Pyc::BINARY_MATRIX_MULTIPLY: case Pyc::INPLACE_ADD: case Pyc::INPLACE_AND: case Pyc::INPLACE_DIVIDE: case Pyc::INPLACE_FLOOR_DIVIDE: case Pyc::INPLACE_LSHIFT: case Pyc::INPLACE_MODULO: case Pyc::INPLACE_MULTIPLY: case Pyc::INPLACE_OR: case Pyc::INPLACE_POWER: case Pyc::INPLACE_RSHIFT: case Pyc::INPLACE_SUBTRACT: case Pyc::INPLACE_TRUE_DIVIDE: case Pyc::INPLACE_XOR: case Pyc::INPLACE_MATRIX_MULTIPLY: { ASTBinary::BinOp op = ASTBinary::from_opcode(opcode); if (op == ASTBinary::BIN_INVALID) throw std::runtime_error("Unhandled opcode from ASTBinary::from_opcode"); PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTBinary(left, right, op)); } break; case Pyc::BINARY_SUBSCR: { PycRef subscr = stack.top(); stack.pop(); PycRef src = stack.top(); stack.pop(); stack.push(new ASTSubscr(src, subscr)); } break; case Pyc::BREAK_LOOP: curblock->append(new ASTKeyword(ASTKeyword::KW_BREAK)); break; case Pyc::BUILD_CLASS: { PycRef class_code = stack.top(); stack.pop(); PycRef bases = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); stack.push(new ASTClass(class_code, bases, name)); } break; case Pyc::BUILD_FUNCTION: { PycRef fun_code = stack.top(); stack.pop(); stack.push(new ASTFunction(fun_code, {}, {})); } break; case Pyc::BUILD_LIST_A: { ASTList::value_t values; for (int i=0; iverCompare(3, 5) >= 0) { auto map = new ASTMap; for (int i=0; i value = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); map->add(key, value); } stack.push(map); } else { if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } stack.push(new ASTMap()); } break; case Pyc::BUILD_CONST_KEY_MAP_A: // Top of stack will be a tuple of keys. // Values will start at TOS - 1. { PycRef keys = stack.top(); stack.pop(); ASTConstMap::values_t values; values.reserve(operand); for (int i = 0; i < operand; ++i) { PycRef value = stack.top(); stack.pop(); values.push_back(value); } stack.push(new ASTConstMap(keys, values)); } break; case Pyc::STORE_MAP: { PycRef key = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); PycRef map = stack.top().cast(); map->add(key, value); } break; case Pyc::BUILD_SLICE_A: { if (operand == 2) { PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } if (start == NULL && end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE0)); } else if (start == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE2, start, end)); } else if (end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, start, end)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, start, end)); } } else if (operand == 3) { PycRef step = stack.top(); stack.pop(); PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } if (step.type() == ASTNode::NODE_OBJECT && step.cast()->object() == Pyc_None) { step = NULL; } /* We have to do this as a slice where one side is another slice */ /* [[a:b]:c] */ if (start == NULL && end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE0)); } else if (start == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE2, start, end)); } else if (end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, start, end)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, start, end)); } PycRef lhs = stack.top(); stack.pop(); if (step == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, lhs, step)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, lhs, step)); } } } break; case Pyc::BUILD_STRING_A: { // Nearly identical logic to BUILD_LIST ASTList::value_t values; for (int i = 0; i < operand; i++) { values.push_front(stack.top()); stack.pop(); } stack.push(new ASTJoinedStr(values)); } break; case Pyc::BUILD_TUPLE_A: { ASTTuple::value_t values; values.resize(operand); for (int i=0; i> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; /* Test for the load build class function */ stack_hist.push(stack); int basecnt = 0; ASTTuple::value_t bases; bases.resize(basecnt); PycRef TOS = stack.top(); int TOS_type = TOS.type(); // bases are NODE_NAME at TOS while (TOS_type == ASTNode::NODE_NAME) { bases.resize(basecnt + 1); bases[basecnt] = TOS; basecnt++; stack.pop(); TOS = stack.top(); TOS_type = TOS.type(); } // qualified name is PycString at TOS PycRef name = stack.top(); stack.pop(); PycRef function = stack.top(); stack.pop(); PycRef loadbuild = stack.top(); stack.pop(); int loadbuild_type = loadbuild.type(); if (loadbuild_type == ASTNode::NODE_LOADBUILDCLASS) { PycRef call = new ASTCall(function, pparamList, kwparamList); stack.push(new ASTClass(call, new ASTTuple(bases), name)); stack_hist.pop(); break; } else { stack = stack_hist.top(); stack_hist.pop(); } for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i param = stack.top(); stack.pop(); if (param.type() == ASTNode::NODE_FUNCTION) { PycRef fun_code = param.cast()->code(); PycRef code_src = fun_code.cast()->object().cast(); PycRef function_name = code_src->name(); if (function_name->isEqual("")) { pparamList.push_front(param); } else { // Decorator used PycRef decor_name = new ASTName(function_name); curblock->append(new ASTStore(param, decor_name)); pparamList.push_front(decor_name); } } else { pparamList.push_front(param); } } PycRef func = stack.top(); stack.pop(); if (opcode == Pyc::CALL_A && stack.top() == nullptr) stack.pop(); stack.push(new ASTCall(func, pparamList, kwparamList)); } break; case Pyc::CALL_FUNCTION_VAR_A: { PycRef var = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setVar(var); stack.push(call); } break; case Pyc::CALL_FUNCTION_KW_A: { PycRef kw = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setKW(kw); stack.push(call); } break; case Pyc::CALL_FUNCTION_VAR_KW_A: { PycRef kw = stack.top(); stack.pop(); PycRef var = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setKW(kw); call.cast()->setVar(var); stack.push(call); } break; case Pyc::CALL_METHOD_A: { ASTCall::pparam_t pparamList; for (int i = 0; i < operand; i++) { PycRef param = stack.top(); stack.pop(); if (param.type() == ASTNode::NODE_FUNCTION) { PycRef fun_code = param.cast()->code(); PycRef code_src = fun_code.cast()->object().cast(); PycRef function_name = code_src->name(); if (function_name->isEqual("")) { pparamList.push_front(param); } else { // Decorator used PycRef decor_name = new ASTName(function_name); curblock->append(new ASTStore(param, decor_name)); pparamList.push_front(decor_name); } } else { pparamList.push_front(param); } } PycRef func = stack.top(); stack.pop(); stack.push(new ASTCall(func, pparamList, ASTCall::kwparam_t())); } break; case Pyc::CONTINUE_LOOP_A: curblock->append(new ASTKeyword(ASTKeyword::KW_CONTINUE)); break; case Pyc::COMPARE_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTCompare(left, right, operand)); } break; case Pyc::CONTAINS_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); // The operand will be 0 for 'in' and 1 for 'not in'. stack.push(new ASTCompare(left, right, operand ? ASTCompare::CMP_NOT_IN : ASTCompare::CMP_IN)); } break; case Pyc::DELETE_ATTR_A: { PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR))); } break; case Pyc::DELETE_GLOBAL_A: code->markGlobal(code->getName(operand)); /* Fall through */ case Pyc::DELETE_NAME_A: { PycRef varname = code->getName(operand); if (varname->length() >= 2 && varname->value()[0] == '_' && varname->value()[1] == '[') { /* Don't show deletes that are a result of list comps. */ break; } PycRef name = new ASTName(varname); curblock->append(new ASTDelete(name)); } break; case Pyc::DELETE_FAST_A: { PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); if (name.cast()->name()->value()[0] == '_' && name.cast()->name()->value()[1] == '[') { /* Don't show deletes that are a result of list comps. */ break; } curblock->append(new ASTDelete(name)); } break; case Pyc::DELETE_SLICE_0: { PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE0)))); } break; case Pyc::DELETE_SLICE_1: { PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE1, upper)))); } break; case Pyc::DELETE_SLICE_2: { PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE2, NULL, lower)))); } break; case Pyc::DELETE_SLICE_3: { PycRef lower = stack.top(); stack.pop(); PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE3, upper, lower)))); } break; case Pyc::DELETE_SUBSCR: { PycRef key = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, key))); } break; case Pyc::DUP_TOP: { if (stack.top().type() == PycObject::TYPE_NULL) { stack.push(stack.top()); } else if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { auto chainstore = stack.top(); stack.pop(); stack.push(stack.top()); stack.push(chainstore); } else { stack.push(stack.top()); ASTNodeList::list_t targets; stack.push(new ASTChainStore(targets, stack.top())); } } break; case Pyc::DUP_TOP_TWO: { PycRef first = stack.top(); stack.pop(); PycRef second = stack.top(); stack.push(first); stack.push(second); stack.push(first); } break; case Pyc::DUP_TOPX_A: { std::stack > first; std::stack > second; for (int i = 0; i < operand; i++) { PycRef node = stack.top(); stack.pop(); first.push(node); second.push(node); } while (first.size()) { stack.push(first.top()); first.pop(); } while (second.size()) { stack.push(second.top()); second.pop(); } } break; case Pyc::END_FINALLY: { bool isFinally = false; if (curblock->blktype() == ASTBlock::BLK_FINALLY) { PycRef final = curblock; blocks.pop(); stack = stack_hist.top(); stack_hist.pop(); curblock = blocks.top(); curblock->append(final.cast()); isFinally = true; } else if (curblock->blktype() == ASTBlock::BLK_EXCEPT) { blocks.pop(); PycRef prev = curblock; bool isUninitAsyncFor = false; if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER) { auto container = blocks.top(); blocks.pop(); auto asyncForBlock = blocks.top(); isUninitAsyncFor = asyncForBlock->blktype() == ASTBlock::BLK_ASYNCFOR && !asyncForBlock->inited(); if (isUninitAsyncFor) { auto tryBlock = container->nodes().front().cast(); if (!tryBlock->nodes().empty() && tryBlock->blktype() == ASTBlock::BLK_TRY) { auto store = tryBlock->nodes().front().try_cast(); if (store) { asyncForBlock.cast()->setIndex(store->dest()); } } curblock = blocks.top(); stack = stack_hist.top(); stack_hist.pop(); if (!curblock->inited()) fprintf(stderr, "Error when decompiling 'async for'.\n"); } else { blocks.push(container); } } if (!isUninitAsyncFor) { if (curblock->size() != 0) { blocks.top()->append(curblock.cast()); } curblock = blocks.top(); /* Turn it into an else statement. */ if (curblock->end() != pos || curblock.cast()->hasFinally()) { PycRef elseblk = new ASTBlock(ASTBlock::BLK_ELSE, prev->end()); elseblk->init(); blocks.push(elseblk); curblock = blocks.top(); } else { stack = stack_hist.top(); stack_hist.pop(); } } } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { /* This marks the end of the except block(s). */ PycRef cont = curblock.cast(); if (!cont->hasFinally() || isFinally) { /* If there's no finally block, pop the container. */ blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } } } break; case Pyc::EXEC_STMT: { if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef loc = stack.top(); stack.pop(); PycRef glob = stack.top(); stack.pop(); PycRef stmt = stack.top(); stack.pop(); curblock->append(new ASTExec(stmt, glob, loc)); } break; case Pyc::FOR_ITER_A: { PycRef iter = stack.top(); // Iterable stack.pop(); /* Pop it? Don't pop it? */ int end; bool comprehension = false; // before 3.8, there is a SETUP_LOOP instruction with block start and end position, // the operand is usually a jump to a POP_BLOCK instruction // after 3.8, block extent has to be inferred implicitly; the operand is a jump to a position after the for block if (mod->majorVer() == 3 && mod->minorVer() >= 8) { end = operand; if (mod->verCompare(3, 10) >= 0) end *= sizeof(uint16_t); // // BPO-27129 end += pos; comprehension = strcmp(code->name()->value(), "") == 0; } else { PycRef top = blocks.top(); end = top->end(); // block end position from SETUP_LOOP if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); } else { comprehension = true; } } PycRef forblk = new ASTIterBlock(ASTBlock::BLK_FOR, curpos, end, iter); forblk->setComprehension(comprehension); blocks.push(forblk.cast()); curblock = blocks.top(); stack.push(NULL); } break; case Pyc::FOR_LOOP_A: { PycRef curidx = stack.top(); // Current index stack.pop(); PycRef iter = stack.top(); // Iterable stack.pop(); bool comprehension = false; PycRef top = blocks.top(); if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); } else { comprehension = true; } PycRef forblk = new ASTIterBlock(ASTBlock::BLK_FOR, curpos, top->end(), iter); forblk->setComprehension(comprehension); blocks.push(forblk.cast()); curblock = blocks.top(); /* Python Docs say: "push the sequence, the incremented counter, and the current item onto the stack." */ stack.push(iter); stack.push(curidx); stack.push(NULL); // We can totally hack this >_> } break; case Pyc::GET_AITER: { // Logic similar to FOR_ITER_A PycRef iter = stack.top(); // Iterable stack.pop(); PycRef top = blocks.top(); if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); PycRef forblk = new ASTIterBlock(ASTBlock::BLK_ASYNCFOR, curpos, top->end(), iter); blocks.push(forblk.cast()); curblock = blocks.top(); stack.push(nullptr); } else { fprintf(stderr, "Unsupported use of GET_AITER outside of SETUP_LOOP\n"); } } break; case Pyc::GET_ANEXT: break; case Pyc::FORMAT_VALUE_A: { auto conversion_flag = static_cast(operand); switch (conversion_flag) { case ASTFormattedValue::ConversionFlag::NONE: case ASTFormattedValue::ConversionFlag::STR: case ASTFormattedValue::ConversionFlag::REPR: case ASTFormattedValue::ConversionFlag::ASCII: { auto val = stack.top(); stack.pop(); stack.push(new ASTFormattedValue(val, conversion_flag, nullptr)); } break; case ASTFormattedValue::ConversionFlag::FMTSPEC: { auto format_spec = stack.top(); stack.pop(); auto val = stack.top(); stack.pop(); stack.push(new ASTFormattedValue(val, conversion_flag, format_spec)); } break; default: fprintf(stderr, "Unsupported FORMAT_VALUE_A conversion flag: %d\n", operand); } } break; case Pyc::GET_AWAITABLE: { PycRef object = stack.top(); stack.pop(); stack.push(new ASTAwaitable(object)); } break; case Pyc::GET_ITER: case Pyc::GET_YIELD_FROM_ITER: /* We just entirely ignore this */ break; case Pyc::IMPORT_NAME_A: if (mod->majorVer() == 1) { stack.push(new ASTImport(new ASTName(code->getName(operand)), NULL)); } else { PycRef fromlist = stack.top(); stack.pop(); if (mod->verCompare(2, 5) >= 0) stack.pop(); // Level -- we don't care stack.push(new ASTImport(new ASTName(code->getName(operand)), fromlist)); } break; case Pyc::IMPORT_FROM_A: stack.push(new ASTName(code->getName(operand))); break; case Pyc::IMPORT_STAR: { PycRef import = stack.top(); stack.pop(); curblock->append(new ASTStore(import, NULL)); } break; case Pyc::IS_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); // The operand will be 0 for 'is' and 1 for 'is not'. stack.push(new ASTCompare(left, right, operand ? ASTCompare::CMP_IS_NOT : ASTCompare::CMP_IS)); } break; case Pyc::JUMP_IF_FALSE_A: case Pyc::JUMP_IF_TRUE_A: case Pyc::JUMP_IF_FALSE_OR_POP_A: case Pyc::JUMP_IF_TRUE_OR_POP_A: case Pyc::POP_JUMP_IF_FALSE_A: case Pyc::POP_JUMP_IF_TRUE_A: { PycRef cond = stack.top(); PycRef ifblk; int popped = ASTCondBlock::UNINITED; if (opcode == Pyc::POP_JUMP_IF_FALSE_A || opcode == Pyc::POP_JUMP_IF_TRUE_A) { /* Pop condition before the jump */ stack.pop(); popped = ASTCondBlock::PRE_POPPED; } /* Store the current stack for the else statement(s) */ stack_hist.push(stack); if (opcode == Pyc::JUMP_IF_FALSE_OR_POP_A || opcode == Pyc::JUMP_IF_TRUE_OR_POP_A) { /* Pop condition only if condition is met */ stack.pop(); popped = ASTCondBlock::POPPED; } /* "Jump if true" means "Jump if not false" */ bool neg = opcode == Pyc::JUMP_IF_TRUE_A || opcode == Pyc::JUMP_IF_TRUE_OR_POP_A || opcode == Pyc::POP_JUMP_IF_TRUE_A; int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (opcode == Pyc::JUMP_IF_FALSE_A || opcode == Pyc::JUMP_IF_TRUE_A) { /* Offset is relative in these cases */ offs = pos + operand; } if (cond.type() == ASTNode::NODE_COMPARE && cond.cast()->op() == ASTCompare::CMP_EXCEPTION) { if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock.cast()->cond() == NULL) { blocks.pop(); curblock = blocks.top(); stack_hist.pop(); } ifblk = new ASTCondBlock(ASTBlock::BLK_EXCEPT, offs, cond.cast()->right(), false); } else if (curblock->blktype() == ASTBlock::BLK_ELSE && curblock->size() == 0) { /* Collapse into elif statement */ blocks.pop(); stack = stack_hist.top(); stack_hist.pop(); ifblk = new ASTCondBlock(ASTBlock::BLK_ELIF, offs, cond, neg); } else if (curblock->size() == 0 && !curblock->inited() && curblock->blktype() == ASTBlock::BLK_WHILE) { /* The condition for a while loop */ PycRef top = blocks.top(); blocks.pop(); ifblk = new ASTCondBlock(top->blktype(), offs, cond, neg); /* We don't store the stack for loops! Pop it! */ stack_hist.pop(); } else if (curblock->size() == 0 && curblock->end() <= offs && (curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELIF || curblock->blktype() == ASTBlock::BLK_WHILE)) { PycRef newcond; PycRef top = curblock.cast(); PycRef cond1 = top->cond(); blocks.pop(); if (curblock->blktype() == ASTBlock::BLK_WHILE) { stack_hist.pop(); } else { FastStack s_top = stack_hist.top(); stack_hist.pop(); stack_hist.pop(); stack_hist.push(s_top); } if (curblock->end() == offs || (curblock->end() == curpos && !top->negative())) { /* if blah and blah */ newcond = new ASTBinary(cond1, cond, ASTBinary::BIN_LOG_AND); } else { /* if blah or blah */ newcond = new ASTBinary(cond1, cond, ASTBinary::BIN_LOG_OR); } ifblk = new ASTCondBlock(top->blktype(), offs, newcond, neg); } else if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension() && mod->verCompare(2, 7) >= 0) { /* Comprehension condition */ curblock.cast()->setCondition(cond); stack_hist.pop(); // TODO: Handle older python versions, where condition // is laid out a little differently. break; } else { /* Plain old if statement */ ifblk = new ASTCondBlock(ASTBlock::BLK_IF, offs, cond, neg); } if (popped) ifblk->init(popped); blocks.push(ifblk.cast()); curblock = blocks.top(); } break; case Pyc::JUMP_ABSOLUTE_A: { int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (offs < pos) { if (curblock->blktype() == ASTBlock::BLK_FOR) { bool is_jump_to_start = offs == curblock.cast()->start(); bool should_pop_for_block = curblock.cast()->isComprehension(); // in v3.8, SETUP_LOOP is deprecated and for blocks aren't terminated by POP_BLOCK, so we add them here bool should_add_for_block = mod->majorVer() == 3 && mod->minorVer() >= 8 && is_jump_to_start && !curblock.cast()->isComprehension(); if (should_pop_for_block || should_add_for_block) { PycRef top = stack.top(); if (top.type() == ASTNode::NODE_COMPREHENSION) { PycRef comp = top.cast(); comp->addGenerator(curblock.cast()); } PycRef tmp = curblock; blocks.pop(); curblock = blocks.top(); if (should_add_for_block) { curblock->append(tmp.cast()); } } } else if (curblock->blktype() == ASTBlock::BLK_ELSE) { stack = stack_hist.top(); stack_hist.pop(); blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_CONTAINER && !curblock.cast()->hasFinally()) { blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); } } else { curblock->append(new ASTKeyword(ASTKeyword::KW_CONTINUE)); } /* We're in a loop, this jumps back to the start */ /* I think we'll just ignore this case... */ break; // Bad idea? Probably! } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (cont->hasExcept() && pos < cont->except()) { PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, 0, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } break; } stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; PycRef nil; bool push = true; do { blocks.pop(); blocks.top()->append(prev.cast()); if (prev->blktype() == ASTBlock::BLK_IF || prev->blktype() == ASTBlock::BLK_ELIF) { if (push) { stack_hist.push(stack); } PycRef next = new ASTBlock(ASTBlock::BLK_ELSE, blocks.top()->end()); if (prev->inited() == ASTCondBlock::PRE_POPPED) { next->init(ASTCondBlock::PRE_POPPED); } blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_EXCEPT) { if (push) { stack_hist.push(stack); } PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, blocks.top()->end(), NULL, false); next->init(); blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_ELSE) { /* Special case */ prev = blocks.top(); if (!push) { stack = stack_hist.top(); stack_hist.pop(); } push = false; } else { prev = nil; } } while (prev != nil); curblock = blocks.top(); } break; case Pyc::JUMP_FORWARD_A: { int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (cont->hasExcept()) { stack_hist.push(stack); curblock->setEnd(pos+offs); PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } break; } if (!stack_hist.empty()) { if (stack.empty()) // if it's part of if-expression, TOS at the moment is the result of "if" part stack = stack_hist.top(); stack_hist.pop(); } PycRef prev = curblock; PycRef nil; bool push = true; do { blocks.pop(); if (!blocks.empty()) blocks.top()->append(prev.cast()); if (prev->blktype() == ASTBlock::BLK_IF || prev->blktype() == ASTBlock::BLK_ELIF) { if (offs == 0) { prev = nil; continue; } if (push) { stack_hist.push(stack); } PycRef next = new ASTBlock(ASTBlock::BLK_ELSE, pos+offs); if (prev->inited() == ASTCondBlock::PRE_POPPED) { next->init(ASTCondBlock::PRE_POPPED); } blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_EXCEPT) { if (offs == 0) { prev = nil; continue; } if (push) { stack_hist.push(stack); } PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); next->init(); blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_ELSE) { /* Special case */ prev = blocks.top(); if (!push) { stack = stack_hist.top(); stack_hist.pop(); } push = false; if (prev->blktype() == ASTBlock::BLK_MAIN) { /* Something went out of control! */ prev = nil; } } else if (prev->blktype() == ASTBlock::BLK_TRY && prev->end() < pos+offs) { /* Need to add an except/finally block */ stack = stack_hist.top(); stack.pop(); if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = blocks.top().cast(); if (cont->hasExcept()) { if (push) { stack_hist.push(stack); } PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); except->init(); blocks.push(except); } } else { fprintf(stderr, "Something TERRIBLE happened!!\n"); } prev = nil; } else { prev = nil; } } while (prev != nil); curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_EXCEPT) { curblock->setEnd(pos+offs); } } break; case Pyc::LIST_APPEND: case Pyc::LIST_APPEND_A: { PycRef value = stack.top(); stack.pop(); PycRef list = stack.top(); if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension()) { stack.pop(); stack.push(new ASTComprehension(value)); } else { stack.push(new ASTSubscr(list, value)); /* Total hack */ } } break; case Pyc::SET_UPDATE_A: { PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); if (rhs.type() != ASTNode::NODE_OBJECT) { fprintf(stderr, "Unsupported argument found for SET_UPDATE\n"); break; } // I've only ever seen this be a TYPE_FROZENSET, but let's be careful... PycRef obj = rhs.cast()->object(); if (obj->type() != PycObject::TYPE_FROZENSET) { fprintf(stderr, "Unsupported argument type found for SET_UPDATE\n"); break; } ASTSet::value_t result = lhs->values(); for (const auto& it : obj.cast()->values()) { result.push_back(new ASTObject(it)); } stack.push(new ASTSet(result)); } break; case Pyc::LIST_EXTEND_A: { PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); if (rhs.type() != ASTNode::NODE_OBJECT) { fprintf(stderr, "Unsupported argument found for LIST_EXTEND\n"); break; } // I've only ever seen this be a SMALL_TUPLE, but let's be careful... PycRef obj = rhs.cast()->object(); if (obj->type() != PycObject::TYPE_TUPLE && obj->type() != PycObject::TYPE_SMALL_TUPLE) { fprintf(stderr, "Unsupported argument type found for LIST_EXTEND\n"); break; } ASTList::value_t result = lhs->values(); for (const auto& it : obj.cast()->values()) { result.push_back(new ASTObject(it)); } stack.push(new ASTList(result)); } break; case Pyc::LOAD_ATTR_A: { PycRef name = stack.top(); if (name.type() != ASTNode::NODE_IMPORT) { stack.pop(); stack.push(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR)); } } break; case Pyc::LOAD_BUILD_CLASS: stack.push(new ASTLoadBuildClass(new PycObject())); break; case Pyc::LOAD_CLOSURE_A: /* Ignore this */ break; case Pyc::LOAD_CONST_A: { PycRef t_ob = new ASTObject(code->getConst(operand)); if ((t_ob->object().type() == PycObject::TYPE_TUPLE || t_ob->object().type() == PycObject::TYPE_SMALL_TUPLE) && !t_ob->object().cast()->values().size()) { ASTTuple::value_t values; stack.push(new ASTTuple(values)); } else if (t_ob->object().type() == PycObject::TYPE_NONE) { stack.push(NULL); } else { stack.push(t_ob.cast()); } } break; case Pyc::LOAD_DEREF_A: stack.push(new ASTName(code->getCellVar(mod, operand))); break; case Pyc::LOAD_FAST_A: if (mod->verCompare(1, 3) < 0) stack.push(new ASTName(code->getName(operand))); else stack.push(new ASTName(code->getLocal(operand))); break; case Pyc::LOAD_GLOBAL_A: stack.push(new ASTName(code->getName(operand))); break; case Pyc::LOAD_LOCALS: stack.push(new ASTNode(ASTNode::NODE_LOCALS)); break; case Pyc::STORE_LOCALS: stack.pop(); break; case Pyc::LOAD_METHOD_A: { // Behave like LOAD_ATTR PycRef name = stack.top(); stack.pop(); stack.push(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR)); } break; case Pyc::LOAD_NAME_A: stack.push(new ASTName(code->getName(operand))); break; case Pyc::MAKE_CLOSURE_A: case Pyc::MAKE_FUNCTION_A: { PycRef fun_code = stack.top(); stack.pop(); /* Test for the qualified name of the function (at TOS) */ int tos_type = fun_code.cast()->object().type(); if (tos_type != PycObject::TYPE_CODE && tos_type != PycObject::TYPE_CODE2) { fun_code = stack.top(); stack.pop(); } ASTFunction::defarg_t defArgs, kwDefArgs; const int defCount = operand & 0xFF; const int kwDefCount = (operand >> 8) & 0xFF; for (int i = 0; i < defCount; ++i) { defArgs.push_front(stack.top()); stack.pop(); } for (int i = 0; i < kwDefCount; ++i) { kwDefArgs.push_front(stack.top()); stack.pop(); } stack.push(new ASTFunction(fun_code, defArgs, kwDefArgs)); } break; case Pyc::NOP: break; case Pyc::POP_BLOCK: { if (curblock->blktype() == ASTBlock::BLK_CONTAINER || curblock->blktype() == ASTBlock::BLK_FINALLY) { /* These should only be popped by an END_FINALLY */ break; } if (curblock->blktype() == ASTBlock::BLK_WITH) { // This should only be popped by a WITH_CLEANUP break; } if (curblock->nodes().size() && curblock->nodes().back().type() == ASTNode::NODE_KEYWORD) { curblock->removeLast(); } if (curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELIF || curblock->blktype() == ASTBlock::BLK_ELSE || curblock->blktype() == ASTBlock::BLK_TRY || curblock->blktype() == ASTBlock::BLK_EXCEPT || curblock->blktype() == ASTBlock::BLK_FINALLY) { if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } else { fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n"); } } PycRef tmp = curblock; blocks.pop(); if (!blocks.empty()) curblock = blocks.top(); if (!(tmp->blktype() == ASTBlock::BLK_ELSE && tmp->nodes().size() == 0)) { curblock->append(tmp.cast()); } if (tmp->blktype() == ASTBlock::BLK_FOR && tmp->end() >= pos) { stack_hist.push(stack); PycRef blkelse = new ASTBlock(ASTBlock::BLK_ELSE, tmp->end()); blocks.push(blkelse); curblock = blocks.top(); } if (curblock->blktype() == ASTBlock::BLK_TRY && tmp->blktype() != ASTBlock::BLK_FOR && tmp->blktype() != ASTBlock::BLK_ASYNCFOR && tmp->blktype() != ASTBlock::BLK_WHILE) { stack = stack_hist.top(); stack_hist.pop(); tmp = curblock; blocks.pop(); curblock = blocks.top(); if (!(tmp->blktype() == ASTBlock::BLK_ELSE && tmp->nodes().size() == 0)) { curblock->append(tmp.cast()); } } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (tmp->blktype() == ASTBlock::BLK_ELSE && !cont->hasFinally()) { /* Pop the container */ blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } else if ((tmp->blktype() == ASTBlock::BLK_ELSE && cont->hasFinally()) || (tmp->blktype() == ASTBlock::BLK_TRY && !cont->hasExcept())) { /* Add the finally block */ stack_hist.push(stack); PycRef final = new ASTBlock(ASTBlock::BLK_FINALLY, 0, true); blocks.push(final); curblock = blocks.top(); } } if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && curblock->end() == pos) { blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); } } break; case Pyc::POP_EXCEPT: /* Do nothing. */ break; case Pyc::POP_TOP: { PycRef value = stack.top(); stack.pop(); if (!curblock->inited()) { if (curblock->blktype() == ASTBlock::BLK_WITH) { curblock.cast()->setExpr(value); } else { curblock->init(); } break; } else if (value == nullptr || value->processed()) { break; } curblock->append(value); if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension()) { /* This relies on some really uncertain logic... * If it's a comprehension, the only POP_TOP should be * a call to append the iter to the list. */ if (value.type() == ASTNode::NODE_CALL) { auto& pparams = value.cast()->pparams(); if (!pparams.empty()) { PycRef res = pparams.front(); stack.push(new ASTComprehension(res)); } } } } break; case Pyc::PRINT_ITEM: { PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == nullptr && !printNode->eol()) printNode->add(stack.top()); else curblock->append(new ASTPrint(stack.top())); stack.pop(); } break; case Pyc::PRINT_ITEM_TO: { PycRef stream = stack.top(); stack.pop(); PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == stream && !printNode->eol()) printNode->add(stack.top()); else curblock->append(new ASTPrint(stack.top(), stream)); stack.pop(); stream->setProcessed(); } break; case Pyc::PRINT_NEWLINE: { PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == nullptr && !printNode->eol()) printNode->setEol(true); else curblock->append(new ASTPrint(nullptr)); stack.pop(); } break; case Pyc::PRINT_NEWLINE_TO: { PycRef stream = stack.top(); stack.pop(); PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == stream && !printNode->eol()) printNode->setEol(true); else curblock->append(new ASTPrint(nullptr, stream)); stack.pop(); stream->setProcessed(); } break; case Pyc::RAISE_VARARGS_A: { ASTRaise::param_t paramList; for (int i = 0; i < operand; i++) { paramList.push_front(stack.top()); stack.pop(); } curblock->append(new ASTRaise(paramList)); if ((curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELSE) && stack_hist.size() && (mod->verCompare(2, 6) >= 0)) { stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); bc_next(source, mod, opcode, operand, pos); } } break; case Pyc::RETURN_VALUE: { PycRef value = stack.top(); stack.pop(); curblock->append(new ASTReturn(value)); if ((curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELSE) && stack_hist.size() && (mod->verCompare(2, 6) >= 0)) { stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); bc_next(source, mod, opcode, operand, pos); } } break; case Pyc::ROT_TWO: { PycRef one = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef two = stack.top(); stack.pop(); stack.push(one); stack.push(two); } break; case Pyc::ROT_THREE: { PycRef one = stack.top(); stack.pop(); PycRef two = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef three = stack.top(); stack.pop(); stack.push(one); stack.push(three); stack.push(two); } break; case Pyc::ROT_FOUR: { PycRef one = stack.top(); stack.pop(); PycRef two = stack.top(); stack.pop(); PycRef three = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef four = stack.top(); stack.pop(); stack.push(one); stack.push(four); stack.push(three); stack.push(two); } break; case Pyc::SET_LINENO_A: // Ignore break; case Pyc::SETUP_WITH_A: { PycRef withblock = new ASTWithBlock(pos+operand); blocks.push(withblock); curblock = blocks.top(); } break; case Pyc::WITH_CLEANUP: { // Stack top should be a None. Ignore it. PycRef none = stack.top(); stack.pop(); if (none != NULL) { fprintf(stderr, "Something TERRIBLE happened!\n"); break; } if (curblock->blktype() == ASTBlock::BLK_WITH && curblock->end() == curpos) { PycRef with = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(with.cast()); } else { fprintf(stderr, "Something TERRIBLE happened! No matching with block found for WITH_CLEANUP at %d\n", curpos); } } break; case Pyc::SETUP_EXCEPT_A: { if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { curblock.cast()->setExcept(pos+operand); } else { PycRef next = new ASTContainerBlock(0, pos+operand); blocks.push(next.cast()); } /* Store the current stack for the except/finally statement(s) */ stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, pos+operand, true); blocks.push(tryblock.cast()); curblock = blocks.top(); need_try = false; } break; case Pyc::SETUP_FINALLY_A: { PycRef next = new ASTContainerBlock(pos+operand); blocks.push(next.cast()); curblock = blocks.top(); need_try = true; } break; case Pyc::SETUP_LOOP_A: { PycRef next = new ASTCondBlock(ASTBlock::BLK_WHILE, pos+operand, NULL, false); blocks.push(next.cast()); curblock = blocks.top(); } break; case Pyc::SLICE_0: { PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE0); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_1: { PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE1, lower); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_2: { PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE2, NULL, upper); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_3: { PycRef upper = stack.top(); stack.pop(); PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE3, lower, upper); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::STORE_ATTR_A: { if (unpack) { PycRef name = stack.top(); stack.pop(); PycRef attr = new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(attr); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef name = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); PycRef attr = new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, attr, stack, curblock); } else { curblock->append(new ASTStore(value, attr)); } } } break; case Pyc::STORE_DEREF_A: { if (unpack) { PycRef name = new ASTName(code->getCellVar(mod, operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef name = new ASTName(code->getCellVar(mod, operand)); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } } break; case Pyc::STORE_FAST_A: { if (unpack) { PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); if (name.cast()->name()->value()[0] == '_' && name.cast()->name()->value()[1] == '[') { /* Don't show stores of list comp append objects. */ break; } if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (curblock->blktype() == ASTBlock::BLK_WITH && !curblock->inited()) { curblock.cast()->setExpr(value); curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } } break; case Pyc::STORE_GLOBAL_A: { PycRef name = new ASTName(code->getName(operand)); if (unpack) { PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } /* Mark the global as used */ code->markGlobal(name.cast()->name()); } break; case Pyc::STORE_NAME_A: { if (unpack) { PycRef name = new ASTName(code->getName(operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef varname = code->getName(operand); if (varname->length() >= 2 && varname->value()[0] == '_' && varname->value()[1] == '[') { /* Don't show stores of list comp append objects. */ break; } // Return private names back to their original name const std::string class_prefix = std::string("_") + code->name()->strValue(); if (varname->startsWith(class_prefix + std::string("__"))) varname->setValue(varname->strValue().substr(class_prefix.size())); PycRef name = new ASTName(varname); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (stack.top().type() == ASTNode::NODE_IMPORT) { PycRef import = stack.top().cast(); import->add_store(new ASTStore(value, name)); } else if (curblock->blktype() == ASTBlock::BLK_WITH && !curblock->inited()) { curblock.cast()->setExpr(value); curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); if (value.type() == ASTNode::NODE_INVALID) break; } } } break; case Pyc::STORE_SLICE_0: { PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE0)))); } break; case Pyc::STORE_SLICE_1: { PycRef upper = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE1, upper)))); } break; case Pyc::STORE_SLICE_2: { PycRef lower = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE2, NULL, lower)))); } break; case Pyc::STORE_SLICE_3: { PycRef lower = stack.top(); stack.pop(); PycRef upper = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE3, upper, lower)))); } break; case Pyc::STORE_SUBSCR: { if (unpack) { PycRef subscr = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef save = new ASTSubscr(dest, subscr); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(save); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef subscr = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef src = stack.top(); stack.pop(); // If variable annotations are enabled, we'll need to check for them here. // Python handles a varaible annotation by setting: // __annotations__['var-name'] = type const bool found_annotated_var = (variable_annotations && dest->type() == ASTNode::Type::NODE_NAME && dest.cast()->name()->isEqual("__annotations__")); if (found_annotated_var) { // Annotations can be done alone or as part of an assignment. // In the case of an assignment, we'll see a NODE_STORE on the stack. if (!curblock->nodes().empty() && curblock->nodes().back()->type() == ASTNode::Type::NODE_STORE) { // Replace the existing NODE_STORE with a new one that includes the annotation. PycRef store = curblock->nodes().back().cast(); curblock->removeLast(); curblock->append(new ASTStore(store->src(), new ASTAnnotatedVar(subscr, src))); } else { curblock->append(new ASTAnnotatedVar(subscr, src)); } } else { if (dest.type() == ASTNode::NODE_MAP) { dest.cast()->add(subscr, src); } else if (src.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(src, new ASTSubscr(dest, subscr), stack, curblock); } else { curblock->append(new ASTStore(src, new ASTSubscr(dest, subscr))); } } } } break; case Pyc::UNARY_CALL: { PycRef func = stack.top(); stack.pop(); stack.push(new ASTCall(func, ASTCall::pparam_t(), ASTCall::kwparam_t())); } break; case Pyc::UNARY_CONVERT: { PycRef name = stack.top(); stack.pop(); stack.push(new ASTConvert(name)); } break; case Pyc::UNARY_INVERT: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_INVERT)); } break; case Pyc::UNARY_NEGATIVE: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_NEGATIVE)); } break; case Pyc::UNARY_NOT: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_NOT)); } break; case Pyc::UNARY_POSITIVE: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_POSITIVE)); } break; case Pyc::UNPACK_LIST_A: case Pyc::UNPACK_TUPLE_A: case Pyc::UNPACK_SEQUENCE_A: { unpack = operand; if (unpack > 0) { ASTTuple::value_t vals; stack.push(new ASTTuple(vals)); } else { // Unpack zero values and assign it to top of stack or for loop variable. // E.g. [] = TOS / for [] in X ASTTuple::value_t vals; auto tup = new ASTTuple(vals); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { tup->setRequireParens(true); curblock.cast()->setIndex(tup); } else if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { auto chainStore = stack.top(); stack.pop(); append_to_chain_store(chainStore, tup, stack, curblock); } else { curblock->append(new ASTStore(stack.top(), tup)); stack.pop(); } } } break; case Pyc::YIELD_FROM: { PycRef dest = stack.top(); stack.pop(); // TODO: Support yielding into a non-null destination PycRef value = stack.top(); if (value) { value->setProcessed(); curblock->append(new ASTReturn(value, ASTReturn::YIELD_FROM)); } } break; case Pyc::YIELD_VALUE: { PycRef value = stack.top(); stack.pop(); curblock->append(new ASTReturn(value, ASTReturn::YIELD)); } break; case Pyc::SETUP_ANNOTATIONS: variable_annotations = true; break; case Pyc::PRECALL_A: case Pyc::RESUME_A: /* We just entirely ignore this / no-op */ break; case Pyc::CACHE: /* These "fake" opcodes are used as placeholders for optimizing certain opcodes in Python 3.11+. Since we have no need for that during disassembly/decompilation, we can just treat these as no-ops. */ break; case Pyc::PUSH_NULL: stack.push(nullptr); break; default: fprintf(stderr, "Unsupported opcode: %s\n", Pyc::OpcodeName(opcode & 0xFF)); cleanBuild = false; return new ASTNodeList(defblock->nodes()); } else_pop = ( (curblock->blktype() == ASTBlock::BLK_ELSE) || (curblock->blktype() == ASTBlock::BLK_IF) || (curblock->blktype() == ASTBlock::BLK_ELIF) ) && (curblock->end() == pos); } if (stack_hist.size()) { fputs("Warning: Stack history is not empty!\n", stderr); while (stack_hist.size()) { stack_hist.pop(); } } if (blocks.size() > 1) { fputs("Warning: block stack is not empty!\n", stderr); while (blocks.size() > 1) { PycRef tmp = blocks.top(); blocks.pop(); blocks.top()->append(tmp.cast()); } } cleanBuild = true; return new ASTNodeList(defblock->nodes()); } static void append_to_chain_store(PycRef &chainStore, PycRef item, FastStack& stack, PycRef &curblock) { stack.pop(); // ignore identical source object. chainStore.cast()->append(item); if (stack.top().type() == PycObject::TYPE_NULL) { curblock->append(chainStore); } else { stack.push(chainStore); } } static int cmp_prec(PycRef parent, PycRef child) { /* Determine whether the parent has higher precedence than therefore child, so we don't flood the source code with extraneous parens. Else we'd have expressions like (((a + b) + c) + d) when therefore equivalent, a + b + c + d would suffice. */ if (parent.type() == ASTNode::NODE_UNARY && parent.cast()->op() == ASTUnary::UN_NOT) return 1; // Always parenthesize not(x) if (child.type() == ASTNode::NODE_BINARY) { PycRef binChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) return binChild->op() - parent.cast()->op(); else if (parent.type() == ASTNode::NODE_COMPARE) return (binChild->op() == ASTBinary::BIN_LOG_AND || binChild->op() == ASTBinary::BIN_LOG_OR) ? 1 : -1; else if (parent.type() == ASTNode::NODE_UNARY) return (binChild->op() == ASTBinary::BIN_POWER) ? -1 : 1; } else if (child.type() == ASTNode::NODE_UNARY) { PycRef unChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) { PycRef binParent = parent.cast(); if (binParent->op() == ASTBinary::BIN_LOG_AND || binParent->op() == ASTBinary::BIN_LOG_OR) return -1; else if (unChild->op() == ASTUnary::UN_NOT) return 1; else if (binParent->op() == ASTBinary::BIN_POWER) return 1; else return -1; } else if (parent.type() == ASTNode::NODE_COMPARE) { return (unChild->op() == ASTUnary::UN_NOT) ? 1 : -1; } else if (parent.type() == ASTNode::NODE_UNARY) { return unChild->op() - parent.cast()->op(); } } else if (child.type() == ASTNode::NODE_COMPARE) { PycRef cmpChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) return (parent.cast()->op() == ASTBinary::BIN_LOG_AND || parent.cast()->op() == ASTBinary::BIN_LOG_OR) ? -1 : 1; else if (parent.type() == ASTNode::NODE_COMPARE) return cmpChild->op() - parent.cast()->op(); else if (parent.type() == ASTNode::NODE_UNARY) return (parent.cast()->op() == ASTUnary::UN_NOT) ? -1 : 1; } /* For normal nodes, don't parenthesize anything */ return -1; } static void print_ordered(PycRef parent, PycRef child, PycModule* mod, std::ostream& pyc_output) { if (child.type() == ASTNode::NODE_BINARY || child.type() == ASTNode::NODE_COMPARE) { if (cmp_prec(parent, child) > 0) { pyc_output << "("; print_src(child, mod, pyc_output); pyc_output << ")"; } else { print_src(child, mod, pyc_output); } } else if (child.type() == ASTNode::NODE_UNARY) { if (cmp_prec(parent, child) > 0) { pyc_output << "("; print_src(child, mod, pyc_output); pyc_output << ")"; } else { print_src(child, mod, pyc_output); } } else { print_src(child, mod, pyc_output); } } static void start_line(int indent, std::ostream& pyc_output) { if (inLambda) return; for (int i=0; i blk, PycModule* mod, std::ostream& pyc_output) { ASTBlock::list_t lines = blk->nodes(); if (lines.size() == 0) { PycRef pass = new ASTKeyword(ASTKeyword::KW_PASS); start_line(cur_indent, pyc_output); print_src(pass, mod, pyc_output); } for (auto ln = lines.cbegin(); ln != lines.cend();) { if ((*ln).cast().type() != ASTNode::NODE_NODELIST) { start_line(cur_indent, pyc_output); } print_src(*ln, mod, pyc_output); if (++ln != lines.end()) { end_line(pyc_output); } } } void print_formatted_value(PycRef formatted_value, PycModule* mod, std::ostream& pyc_output) { pyc_output << "{"; print_src(formatted_value->val(), mod, pyc_output); switch (formatted_value->conversion()) { case ASTFormattedValue::ConversionFlag::NONE: break; case ASTFormattedValue::ConversionFlag::STR: pyc_output << "!s"; break; case ASTFormattedValue::ConversionFlag::REPR: pyc_output << "!r"; break; case ASTFormattedValue::ConversionFlag::ASCII: pyc_output << "!a"; break; case ASTFormattedValue::ConversionFlag::FMTSPEC: pyc_output << ":" << formatted_value->format_spec().cast()->object().cast()->value(); break; default: fprintf(stderr, "Unsupported NODE_FORMATTEDVALUE conversion flag: %d\n", formatted_value->conversion()); } pyc_output << "}"; } void print_src(PycRef node, PycModule* mod, std::ostream &pyc_output) { if (node == NULL) { pyc_output << "None"; cleanBuild = true; return; } switch (node->type()) { case ASTNode::NODE_BINARY: case ASTNode::NODE_COMPARE: { PycRef bin = node.cast(); print_ordered(node, bin->left(), mod, pyc_output); pyc_output << bin->op_str(); print_ordered(node, bin->right(), mod, pyc_output); } break; case ASTNode::NODE_UNARY: { PycRef un = node.cast(); pyc_output << un->op_str(); print_ordered(node, un->operand(), mod, pyc_output); } break; case ASTNode::NODE_CALL: { PycRef call = node.cast(); print_src(call->func(), mod, pyc_output); pyc_output << "("; bool first = true; for (const auto& param : call->pparams()) { if (!first) pyc_output << ", "; print_src(param, mod, pyc_output); first = false; } for (const auto& param : call->kwparams()) { if (!first) pyc_output << ", "; if (param.first.type() == ASTNode::NODE_NAME) { pyc_output << param.first.cast()->name()->value() << " = "; } else { PycRef str_name = param.first.cast()->object().cast(); pyc_output << str_name->value() << " = "; } print_src(param.second, mod, pyc_output); first = false; } if (call->hasVar()) { if (!first) pyc_output << ", "; pyc_output << "*"; print_src(call->var(), mod, pyc_output); first = false; } if (call->hasKW()) { if (!first) pyc_output << ", "; pyc_output << "**"; print_src(call->kw(), mod, pyc_output); first = false; } pyc_output << ")"; } break; case ASTNode::NODE_DELETE: { pyc_output << "del "; print_src(node.cast()->value(), mod, pyc_output); } break; case ASTNode::NODE_EXEC: { PycRef exec = node.cast(); pyc_output << "exec "; print_src(exec->statement(), mod, pyc_output); if (exec->globals() != NULL) { pyc_output << " in "; print_src(exec->globals(), mod, pyc_output); if (exec->locals() != NULL && exec->globals() != exec->locals()) { pyc_output << ", "; print_src(exec->locals(), mod, pyc_output); } } } break; case ASTNode::NODE_FORMATTEDVALUE: pyc_output << "f" F_STRING_QUOTE; print_formatted_value(node.cast(), mod, pyc_output); pyc_output << F_STRING_QUOTE; break; case ASTNode::NODE_JOINEDSTR: pyc_output << "f" F_STRING_QUOTE; for (const auto& val : node.cast()->values()) { switch (val.type()) { case ASTNode::NODE_FORMATTEDVALUE: print_formatted_value(val.cast(), mod, pyc_output); break; case ASTNode::NODE_OBJECT: // When printing a piece of the f-string, keep the quote style consistent. // This avoids problems when ''' or """ is part of the string. print_const(val.cast()->object(), mod, F_STRING_QUOTE, pyc_output); break; default: fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type()); } } pyc_output << F_STRING_QUOTE; break; case ASTNode::NODE_KEYWORD: pyc_output << node.cast()->word_str(); break; case ASTNode::NODE_LIST: { pyc_output << "["; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val, mod, pyc_output); first = false; } cur_indent--; pyc_output << "]"; } break; case ASTNode::NODE_SET: { pyc_output << "{"; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val, mod, pyc_output); first = false; } cur_indent--; pyc_output << "}"; } break; case ASTNode::NODE_COMPREHENSION: { PycRef comp = node.cast(); pyc_output << "[ "; print_src(comp->result(), mod, pyc_output); for (const auto& gen : comp->generators()) { pyc_output << " for "; print_src(gen->index(), mod, pyc_output); pyc_output << " in "; print_src(gen->iter(), mod, pyc_output); if (gen->condition()) { pyc_output << " if "; print_src(gen->condition(), mod, pyc_output); } } pyc_output << " ]"; } break; case ASTNode::NODE_MAP: { pyc_output << "{"; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val.first, mod, pyc_output); pyc_output << ": "; print_src(val.second, mod, pyc_output); first = false; } cur_indent--; pyc_output << " }"; } break; case ASTNode::NODE_CONST_MAP: { PycRef const_map = node.cast(); PycTuple::value_t keys = const_map->keys().cast()->object().cast()->values(); ASTConstMap::values_t values = const_map->values(); auto map = new ASTMap; for (const auto& key : keys) { // Values are pushed onto the stack in reverse order. PycRef value = values.back(); values.pop_back(); map->add(new ASTObject(key), value); } print_src(map, mod, pyc_output); } break; case ASTNode::NODE_NAME: pyc_output << node.cast()->name()->value(); break; case ASTNode::NODE_NODELIST: { cur_indent++; for (const auto& ln : node.cast()->nodes()) { if (ln.cast().type() != ASTNode::NODE_NODELIST) { start_line(cur_indent, pyc_output); } print_src(ln, mod, pyc_output); end_line(pyc_output); } cur_indent--; } break; case ASTNode::NODE_BLOCK: { PycRef blk = node.cast(); if (blk->blktype() == ASTBlock::BLK_ELSE && blk->size() == 0) break; if (blk->blktype() == ASTBlock::BLK_CONTAINER) { end_line(pyc_output); print_block(blk, mod, pyc_output); end_line(pyc_output); break; } pyc_output << blk->type_str(); if (blk->blktype() == ASTBlock::BLK_IF || blk->blktype() == ASTBlock::BLK_ELIF || blk->blktype() == ASTBlock::BLK_WHILE) { if (blk.cast()->negative()) pyc_output << " not "; else pyc_output << " "; print_src(blk.cast()->cond(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_FOR || blk->blktype() == ASTBlock::BLK_ASYNCFOR) { pyc_output << " "; print_src(blk.cast()->index(), mod, pyc_output); pyc_output << " in "; print_src(blk.cast()->iter(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_EXCEPT && blk.cast()->cond() != NULL) { pyc_output << " "; print_src(blk.cast()->cond(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_WITH) { pyc_output << " "; print_src(blk.cast()->expr(), mod, pyc_output); PycRef var = blk.try_cast()->var(); if (var != NULL) { pyc_output << " as "; print_src(var, mod, pyc_output); } } pyc_output << ":\n"; cur_indent++; print_block(blk, mod, pyc_output); cur_indent--; } break; case ASTNode::NODE_OBJECT: { PycRef obj = node.cast()->object(); if (obj.type() == PycObject::TYPE_CODE) { PycRef code = obj.cast(); decompyle(code, mod, pyc_output); } else { print_const(obj, mod, nullptr, pyc_output); } } break; case ASTNode::NODE_PRINT: { pyc_output << "print "; bool first = true; if (node.cast()->stream() != nullptr) { pyc_output << ">>"; print_src(node.cast()->stream(), mod, pyc_output); first = false; } for (const auto& val : node.cast()->values()) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } if (!node.cast()->eol()) pyc_output << ","; } break; case ASTNode::NODE_RAISE: { PycRef raise = node.cast(); pyc_output << "raise "; bool first = true; for (const auto& param : raise->params()) { if (!first) pyc_output << ", "; print_src(param, mod, pyc_output); first = false; } } break; case ASTNode::NODE_RETURN: { PycRef ret = node.cast(); PycRef value = ret->value(); if (!inLambda) { switch (ret->rettype()) { case ASTReturn::RETURN: pyc_output << "return "; break; case ASTReturn::YIELD: pyc_output << "yield "; break; case ASTReturn::YIELD_FROM: if (value.type() == ASTNode::NODE_AWAITABLE) { pyc_output << "await "; value = value.cast()->expression(); } else { pyc_output << "yield from "; } break; } } print_src(value, mod, pyc_output); } break; case ASTNode::NODE_SLICE: { PycRef slice = node.cast(); if (slice->op() & ASTSlice::SLICE1) { print_src(slice->left(), mod, pyc_output); } pyc_output << ":"; if (slice->op() & ASTSlice::SLICE2) { print_src(slice->right(), mod, pyc_output); } } break; case ASTNode::NODE_IMPORT: { PycRef import = node.cast(); if (import->stores().size()) { ASTImport::list_t stores = import->stores(); pyc_output << "from "; if (import->name().type() == ASTNode::NODE_IMPORT) print_src(import->name().cast()->name(), mod, pyc_output); else print_src(import->name(), mod, pyc_output); pyc_output << " import "; if (stores.size() == 1) { auto src = stores.front()->src(); auto dest = stores.front()->dest(); print_src(src, mod, pyc_output); if (src.cast()->name()->value() != dest.cast()->name()->value()) { pyc_output << " as "; print_src(dest, mod, pyc_output); } } else { bool first = true; for (const auto& st : stores) { if (!first) pyc_output << ", "; print_src(st->src(), mod, pyc_output); first = false; if (st->src().cast()->name()->value() != st->dest().cast()->name()->value()) { pyc_output << " as "; print_src(st->dest(), mod, pyc_output); } } } } else { pyc_output << "import "; print_src(import->name(), mod, pyc_output); } } break; case ASTNode::NODE_FUNCTION: { /* Actual named functions are NODE_STORE with a name */ pyc_output << "(lambda "; PycRef code = node.cast()->code(); PycRef code_src = code.cast()->object().cast(); ASTFunction::defarg_t defargs = node.cast()->defargs(); ASTFunction::defarg_t kwdefargs = node.cast()->kwdefargs(); auto da = defargs.cbegin(); int narg = 0; for (int i=0; iargCount(); i++) { if (narg) pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->argCount() - i) <= (int)defargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } da = kwdefargs.cbegin(); if (code_src->kwOnlyArgCount() != 0) { pyc_output << (narg == 0 ? "*" : ", *"); for (int i = 0; i < code_src->argCount(); i++) { pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->kwOnlyArgCount() - i) <= (int)kwdefargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } } pyc_output << ": "; inLambda = true; print_src(code, mod, pyc_output); inLambda = false; pyc_output << ")"; } break; case ASTNode::NODE_STORE: { PycRef src = node.cast()->src(); PycRef dest = node.cast()->dest(); if (src.type() == ASTNode::NODE_FUNCTION) { PycRef code = src.cast()->code(); PycRef code_src = code.cast()->object().cast(); bool isLambda = false; if (strcmp(code_src->name()->value(), "") == 0) { pyc_output << "\n"; start_line(cur_indent, pyc_output); print_src(dest, mod, pyc_output); pyc_output << " = lambda "; isLambda = true; } else { pyc_output << "\n"; start_line(cur_indent, pyc_output); if (code_src->flags() & PycCode::CO_COROUTINE) pyc_output << "async "; pyc_output << "def "; print_src(dest, mod, pyc_output); pyc_output << "("; } ASTFunction::defarg_t defargs = src.cast()->defargs(); ASTFunction::defarg_t kwdefargs = src.cast()->kwdefargs(); auto da = defargs.cbegin(); int narg = 0; for (int i = 0; i < code_src->argCount(); ++i) { if (narg) pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->argCount() - i) <= (int)defargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } da = kwdefargs.cbegin(); if (code_src->kwOnlyArgCount() != 0) { pyc_output << (narg == 0 ? "*" : ", *"); for (int i = 0; i < code_src->kwOnlyArgCount(); ++i) { pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->kwOnlyArgCount() - i) <= (int)kwdefargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } } if (code_src->flags() & PycCode::CO_VARARGS) { if (narg) pyc_output << ", "; pyc_output << "*" << code_src->getLocal(narg++)->value(); } if (code_src->flags() & PycCode::CO_VARKEYWORDS) { if (narg) pyc_output << ", "; pyc_output << "**" << code_src->getLocal(narg++)->value(); } if (isLambda) { pyc_output << ": "; } else { pyc_output << "):\n"; printDocstringAndGlobals = true; } bool preLambda = inLambda; inLambda |= isLambda; print_src(code, mod, pyc_output); inLambda = preLambda; } else if (src.type() == ASTNode::NODE_CLASS) { pyc_output << "\n"; start_line(cur_indent, pyc_output); pyc_output << "class "; print_src(dest, mod, pyc_output); PycRef bases = src.cast()->bases().cast(); if (bases->values().size() > 0) { pyc_output << "("; bool first = true; for (const auto& val : bases->values()) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } pyc_output << "):\n"; } else { // Don't put parens if there are no base classes pyc_output << ":\n"; } printClassDocstring = true; PycRef code = src.cast()->code().cast() ->func().cast()->code(); print_src(code, mod, pyc_output); } else if (src.type() == ASTNode::NODE_IMPORT) { PycRef import = src.cast(); if (import->fromlist() != NULL) { PycRef fromlist = import->fromlist().cast()->object(); if (fromlist != Pyc_None) { pyc_output << "from "; if (import->name().type() == ASTNode::NODE_IMPORT) print_src(import->name().cast()->name(), mod, pyc_output); else print_src(import->name(), mod, pyc_output); pyc_output << " import "; if (fromlist.type() == PycObject::TYPE_TUPLE || fromlist.type() == PycObject::TYPE_SMALL_TUPLE) { bool first = true; for (const auto& val : fromlist.cast()->values()) { if (!first) pyc_output << ", "; pyc_output << val.cast()->value(); first = false; } } else { pyc_output << fromlist.cast()->value(); } } else { pyc_output << "import "; print_src(import->name(), mod, pyc_output); } } else { pyc_output << "import "; PycRef import_name = import->name(); print_src(import_name, mod, pyc_output); if (!dest.cast()->name()->isEqual(import_name.cast()->name().cast())) { pyc_output << " as "; print_src(dest, mod, pyc_output); } } } else if (src.type() == ASTNode::NODE_BINARY && src.cast()->is_inplace()) { print_src(src, mod, pyc_output); } else { print_src(dest, mod, pyc_output); pyc_output << " = "; print_src(src, mod, pyc_output); } } break; case ASTNode::NODE_CHAINSTORE: { for (auto& dest : node.cast()->nodes()) { print_src(dest, mod, pyc_output); pyc_output << " = "; } print_src(node.cast()->src(), mod, pyc_output); } break; case ASTNode::NODE_SUBSCR: { print_src(node.cast()->name(), mod, pyc_output); pyc_output << "["; print_src(node.cast()->key(), mod, pyc_output); pyc_output << "]"; } break; case ASTNode::NODE_CONVERT: { pyc_output << "`"; print_src(node.cast()->name(), mod, pyc_output); pyc_output << "`"; } break; case ASTNode::NODE_TUPLE: { PycRef tuple = node.cast(); ASTTuple::value_t values = tuple->values(); if (tuple->requireParens()) pyc_output << "("; bool first = true; for (const auto& val : values) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } if (values.size() == 1) pyc_output << ','; if (tuple->requireParens()) pyc_output << ')'; } break; case ASTNode::NODE_ANNOTATED_VAR: { PycRef annotated_var = node.cast(); PycRef name = annotated_var->name().cast(); PycRef annotation = annotated_var->annotation(); pyc_output << name->object().cast()->value(); pyc_output << ": "; print_src(annotation, mod, pyc_output); } break; case ASTNode::NODE_TERNARY: { /* parenthesis might be needed * * when if-expr is part of numerical expression, ternary has the LOWEST precedence * print(a + b if False else c) * output is c, not a+c (a+b is calculated first) * * but, let's not add parenthesis - to keep the source as close to original as possible in most cases */ PycRef ternary = node.cast(); //pyc_output << "("; print_src(ternary->if_expr(), mod, pyc_output); const auto if_block = ternary->if_block().cast(); pyc_output << " if "; if (if_block->negative()) pyc_output << "not "; print_src(if_block->cond(), mod, pyc_output); pyc_output << " else "; print_src(ternary->else_expr(), mod, pyc_output); //pyc_output << ")"; } break; default: pyc_output << "type() << ">"; fprintf(stderr, "Unsupported Node type: %d\n", node->type()); cleanBuild = false; return; } cleanBuild = true; } bool print_docstring(PycRef obj, int indent, PycModule* mod, std::ostream& pyc_output) { // docstrings are translated from the bytecode __doc__ = 'string' to simply '''string''' signed char prefix = -1; switch (obj.type()) { case PycObject::TYPE_STRING: prefix = mod->strIsUnicode() ? 'b' : 0; break; case PycObject::TYPE_UNICODE: prefix = mod->strIsUnicode() ? 0 : 'u'; break; case PycObject::TYPE_INTERNED: case PycObject::TYPE_ASCII: case PycObject::TYPE_ASCII_INTERNED: case PycObject::TYPE_SHORT_ASCII: case PycObject::TYPE_SHORT_ASCII_INTERNED: if (mod->majorVer() >= 3) prefix = 0; else prefix = mod->strIsUnicode() ? 'b' : 0; break; } if (prefix != -1) { start_line(indent, pyc_output); OutputString(obj.cast(), prefix, true, pyc_output); pyc_output << "\n"; return true; } else return false; } void decompyle(PycRef code, PycModule* mod, std::ostream& pyc_output) { PycRef source = BuildFromCode(code, mod); PycRef clean = source.cast(); if (cleanBuild) { // The Python compiler adds some stuff that we don't really care // about, and would add extra code for re-compilation anyway. // We strip these lines out here, and then add a "pass" statement // if the cleaned up code is empty if (clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->src().type() == ASTNode::NODE_NAME && store->dest().type() == ASTNode::NODE_NAME) { PycRef src = store->src().cast(); PycRef dest = store->dest().cast(); if (src->name()->isEqual("__name__") && dest->name()->isEqual("__module__")) { // __module__ = __name__ // Automatically added by Python 2.2.1 and later clean->removeFirst(); } } } if (clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->src().type() == ASTNode::NODE_OBJECT && store->dest().type() == ASTNode::NODE_NAME) { PycRef src = store->src().cast(); PycRef srcString = src->object().try_cast(); PycRef dest = store->dest().cast(); if (srcString != nullptr && srcString->isEqual(code->name().cast()) && dest->name()->isEqual("__qualname__")) { // __qualname__ = '' // Automatically added by Python 3.3 and later clean->removeFirst(); } } } // Class and module docstrings may only appear at the beginning of their source if (printClassDocstring && clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->dest().type() == ASTNode::NODE_NAME && store->dest().cast()->name()->isEqual("__doc__") && store->src().type() == ASTNode::NODE_OBJECT) { if (print_docstring(store->src().cast()->object(), cur_indent + (code->name()->isEqual("") ? 0 : 1), mod, pyc_output)) clean->removeFirst(); } } if (clean->nodes().back().type() == ASTNode::NODE_RETURN) { PycRef ret = clean->nodes().back().cast(); if (ret->value() == NULL || ret->value().type() == ASTNode::NODE_LOCALS) { clean->removeLast(); // Always an extraneous return statement } } } if (printClassDocstring) printClassDocstring = false; // This is outside the clean check so a source block will always // be compilable, even if decompylation failed. if (clean->nodes().size() == 0 && !code.isIdent(mod->code())) clean->append(new ASTKeyword(ASTKeyword::KW_PASS)); bool part1clean = cleanBuild; if (printDocstringAndGlobals) { if (code->consts()->size()) print_docstring(code->getConst(0), cur_indent + 1, mod, pyc_output); PycCode::globals_t globs = code->getGlobals(); if (globs.size()) { start_line(cur_indent + 1, pyc_output); pyc_output << "global "; bool first = true; for (const auto& glob : globs) { if (!first) pyc_output << ", "; pyc_output << glob->value(); first = false; } pyc_output << "\n"; } printDocstringAndGlobals = false; } print_src(source, mod, pyc_output); if (!cleanBuild || !part1clean) { start_line(cur_indent, pyc_output); pyc_output << "# WARNING: Decompyle incomplete\n"; } }