From aaad8684452126f90a3a3fe4f0be9d0ac7bbf65a Mon Sep 17 00:00:00 2001 From: Akash Munagala Date: Sun, 12 Feb 2023 15:56:18 -0800 Subject: [PATCH 1/5] Implement BUILD_SET and SET_UPDATE --- ASTNode.h | 14 +++++++++++++- ASTree.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/ASTNode.h b/ASTNode.h index 3a30814..8ac6e1c 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -12,7 +12,7 @@ public: NODE_INVALID, NODE_NODELIST, NODE_OBJECT, NODE_UNARY, NODE_BINARY, NODE_COMPARE, NODE_SLICE, NODE_STORE, NODE_RETURN, NODE_NAME, NODE_DELETE, NODE_FUNCTION, NODE_CLASS, NODE_CALL, NODE_IMPORT, - NODE_TUPLE, NODE_LIST, NODE_MAP, NODE_SUBSCR, NODE_PRINT, + NODE_TUPLE, NODE_LIST, NODE_SET, NODE_MAP, NODE_SUBSCR, NODE_PRINT, NODE_CONVERT, NODE_KEYWORD, NODE_RAISE, NODE_EXEC, NODE_BLOCK, NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE, NODE_FORMATTEDVALUE, NODE_JOINEDSTR, NODE_CONST_MAP, @@ -358,6 +358,18 @@ private: value_t m_values; }; +class ASTSet : public ASTNode { +public: + typedef std::set> value_t; + + ASTSet(value_t values) + : ASTNode(NODE_SET), m_values(std::move(values)) { } + + const value_t& values() const { return m_values; } + +private: + value_t m_values; +}; class ASTMap : public ASTNode { public: diff --git a/ASTree.cpp b/ASTree.cpp index 1c283a1..a95b8ef 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -322,6 +322,16 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) stack.push(new ASTList(values)); } break; + case Pyc::BUILD_SET_A: + { + ASTSet::value_t values; + for (int i=0; iverCompare(3, 5) >= 0) { auto map = new ASTMap; @@ -1547,6 +1557,33 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) } } break; + case Pyc::SET_UPDATE_A: + { + PycRef rhs = stack.top(); + stack.pop(); + PycRef lhs = stack.top().cast(); + stack.pop(); + + if (rhs.type() != ASTNode::NODE_OBJECT) { + fprintf(stderr, "Unsupported argument found for SET_UPDATE\n"); + break; + } + + // I've only ever seen this be a TYPE_FROZENSET, but let's be careful... + PycRef obj = rhs.cast()->object(); + if (obj->type() != PycObject::TYPE_FROZENSET) { + fprintf(stderr, "Unsupported argument type found for SET_UPDATE\n"); + break; + } + + ASTSet::value_t result = lhs->values(); + for (const auto& it : obj.cast()->values()) { + result.insert(new ASTObject(it)); + } + + stack.push(new ASTSet(result)); + } + break; case Pyc::LIST_EXTEND_A: { PycRef rhs = stack.top(); @@ -2838,6 +2875,24 @@ void print_src(PycRef node, PycModule* mod) fputs("]", pyc_output); } break; + case ASTNode::NODE_SET: + { + fputs("{", pyc_output); + bool first = true; + cur_indent++; + for (const auto& val : node.cast()->values()) { + if (first) + fputs("\n", pyc_output); + else + fputs(",\n", pyc_output); + start_line(cur_indent); + print_src(val, mod); + first = false; + } + cur_indent--; + fputs("}", pyc_output); + } + break; case ASTNode::NODE_COMPREHENSION: { PycRef comp = node.cast(); From 66c46661d91db301459bffc1e3e4500c348c5e5f Mon Sep 17 00:00:00 2001 From: Akash Munagala Date: Sun, 12 Feb 2023 16:21:46 -0800 Subject: [PATCH 2/5] Add test --- tests/compiled/test_sets.3.10.pyc | Bin 0 -> 198 bytes tests/input/test_sets.py | 5 +++++ tests/tokenized/test_sets.txt | 4 ++++ 3 files changed, 9 insertions(+) create mode 100644 tests/compiled/test_sets.3.10.pyc create mode 100644 tests/input/test_sets.py create mode 100644 tests/tokenized/test_sets.txt diff --git a/tests/compiled/test_sets.3.10.pyc b/tests/compiled/test_sets.3.10.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0562b4e61a4fb40862f8af52bbbd3c6a58e080ef GIT binary patch literal 198 zcmd1j<>g`kf}rk~$ss`cF^GcivMF*&=~u>d(mAmJjA@JnV80TT1mWWB{)oLX{= zG4U2-(k;g1TZ}0y8H#v7DnZ0Ao|4q!l6atsV!eXOTO2mI`6;D2sdgZviaCG;8v_R; I2O|#?0D+MwMgRZ+ literal 0 HcmV?d00001 diff --git a/tests/input/test_sets.py b/tests/input/test_sets.py new file mode 100644 index 0000000..93f493a --- /dev/null +++ b/tests/input/test_sets.py @@ -0,0 +1,5 @@ +a = set() +b = {1, 2} +c = {"AB", "CD"} +# Below uses SET_UPDATE +d = {1, 2, 3, 4} diff --git a/tests/tokenized/test_sets.txt b/tests/tokenized/test_sets.txt new file mode 100644 index 0000000..c939d7d --- /dev/null +++ b/tests/tokenized/test_sets.txt @@ -0,0 +1,4 @@ +a = set ( ) +b = { 1 , 2 } +c = { 'AB' , 'CD' } +d = { 2 , 1 , 3 , 4 } From f00921d3d37e8cd3fc3da088028fa952538c34d0 Mon Sep 17 00:00:00 2001 From: Akash Munagala Date: Mon, 13 Feb 2023 19:25:45 -0800 Subject: [PATCH 3/5] Use deque instead of set --- ASTNode.h | 3 ++- ASTree.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ASTNode.h b/ASTNode.h index 8ac6e1c..8d5079a 100644 --- a/ASTNode.h +++ b/ASTNode.h @@ -3,6 +3,7 @@ #include "pyc_module.h" #include +#include /* Similar interface to PycObject, so PycRef can work on it... * * However, this does *NOT* mean the two are interchangeable! */ @@ -360,7 +361,7 @@ private: class ASTSet : public ASTNode { public: - typedef std::set> value_t; + typedef std::deque> value_t; ASTSet(value_t values) : ASTNode(NODE_SET), m_values(std::move(values)) { } diff --git a/ASTree.cpp b/ASTree.cpp index a95b8ef..7af32d9 100644 --- a/ASTree.cpp +++ b/ASTree.cpp @@ -326,7 +326,7 @@ PycRef BuildFromCode(PycRef code, PycModule* mod) { ASTSet::value_t values; for (int i=0; i BuildFromCode(PycRef code, PycModule* mod) ASTSet::value_t result = lhs->values(); for (const auto& it : obj.cast()->values()) { - result.insert(new ASTObject(it)); + result.push_back(new ASTObject(it)); } stack.push(new ASTSet(result)); From 80e835e197c473a5c43ac6e67a4afbbafc4da49d Mon Sep 17 00:00:00 2001 From: Akash Munagala Date: Mon, 13 Feb 2023 19:26:30 -0800 Subject: [PATCH 4/5] Properly output frozenset object in pycdas --- pycdas.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pycdas.cpp b/pycdas.cpp index 2be7a4f..cdb6bf0 100644 --- a/pycdas.cpp +++ b/pycdas.cpp @@ -211,6 +211,14 @@ void output_object(PycRef obj, PycModule* mod, int indent, iputs(indent, "}\n"); } break; + case PycObject::TYPE_FROZENSET: + { + iputs(indent, "frozenset({\n"); + for (const auto& val : obj.cast()->values()) + output_object(val, mod, indent + 1, flags); + iputs(indent, "})\n"); + } + break; case PycObject::TYPE_NONE: iputs(indent, "None\n"); break; From ca73f058306ab817102194baac6f3cf5bce785b9 Mon Sep 17 00:00:00 2001 From: Akash Munagala Date: Mon, 13 Feb 2023 19:33:25 -0800 Subject: [PATCH 5/5] Print out frozenset in disasm too --- bytecode.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/bytecode.cpp b/bytecode.cpp index 607ddf7..b96c33d 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -257,6 +257,21 @@ void print_const(PycRef obj, PycModule* mod, const char* parent_f_str fputs("}", pyc_output); } break; + case PycObject::TYPE_FROZENSET: + { + fputs("frozenset({", pyc_output); + PycSet::value_t values = obj.cast()->values(); + auto it = values.cbegin(); + if (it != values.cend()) { + print_const(*it, mod); + while (++it != values.cend()) { + fputs(", ", pyc_output); + print_const(*it, mod); + } + } + fputs("})", pyc_output); + } + break; case PycObject::TYPE_NONE: fputs("None", pyc_output); break; @@ -312,6 +327,8 @@ void print_const(PycRef obj, PycModule* mod, const char* parent_f_str case PycObject::TYPE_CODE2: fprintf(pyc_output, " %s", obj.cast()->name()->value()); break; + default: + fprintf(pyc_output, "\n", obj->type()); } }