fix: friendly message for upx (gh-12, gh-35)

Merge pull request #32 from lweipanw/main
fix(detect & shot): not necessarily starts with b"PY00" e.g. PY01xxx
2026-02-23 21:03:20 +08:00 · 2026-01-31 01:39:45 +08:00 · 2026-01-31 01:30:43 +08:00 · 2025-12-17 23:49:38 +08:00 · 2025-11-20 17:48:56 +08:00 · 2025-11-20 16:06:53 +08:00
6 changed files with 95 additions and 84 deletions
--- a/.github/workflows/regtest.yml
+++ b/.github/workflows/regtest.yml
@@ -3,7 +3,7 @@
 name: Regression Test
 on:
  push:
-    branches: [main]
+    branches: [main, update-tests]

 jobs:
  regtest:
@@ -39,17 +39,28 @@ jobs:
          lfs: true
          submodules: true

-      - name: Run shot.py
+      - name: Run shot.py and check for changes
+        working-directory: ./regtest
        run: |
-          touch ./regtest/.git/.no1shot
+          find . -type f -name '*\.1shot\.*' -exec rm -f {} + >/dev/null 2>&1
+          touch ./.git/.no1shot
          pip install pycryptodome
-          python3 ./project/oneshot/shot.py ./regtest --no-banner >/dev/null 2>&1
+          python3 ../project/oneshot/shot.py . --no-banner >/dev/null 2>&1
+          if [ -n "$(git status --porcelain)" ]; then
+            cd ../project
+            COMMIT_MSG=$(git log -1 --pretty=%B | head -n 1)
+            echo "$COMMIT_MSG" | grep -q "update tests" || { echo "Workspace changed but commit message does not contain 'update tests'. Failing workflow."; exit 1; }
+          fi

      - name: Commit and push changes
+        if: ${{
+          steps.commit_message.outputs.message &&
+          (hashFiles('regtest/**') != '') &&
+          (contains(steps.commit_message.outputs.message, 'update tests'))
+          }}
        uses: EndBug/add-and-commit@a94899bca583c204427a224a7af87c02f9b325d5 # v9.1.4
        with:
          cwd: ./regtest
          add: .
          default_author: github_actions
          message: ${{ steps.commit_message.outputs.message }}
-          commit: "--allow-empty"
--- a/oneshot/detect.py
+++ b/oneshot/detect.py
@@ -3,21 +3,24 @@ import os
 from typing import List, Tuple, Union


+from util import dword
+
+
 def ascii_ratio(data: bytes) -> float:
    return sum(32 <= c < 127 for c in data) / len(data)


+def valid_bytes(data: bytes) -> bool:
+    return len(data) > 64 and all(0x30 <= b <= 0x39 for b in data[2:8]) and data[9] == 3
+
+
 def source_as_file(file_path: str) -> Union[List[bytes], None]:
    try:
        with open(file_path, "r") as f:
            co = compile(f.read(), "<str>", "exec")
-            data = [
-                i
-                for i in co.co_consts
-                if type(i) is bytes and i.startswith(b"PY00") and len(i) > 64
-            ]
+            data = [i for i in co.co_consts if type(i) is bytes and valid_bytes(i)]
            return data
-    except:
+    except Exception:
        return None


@@ -29,56 +32,50 @@ def source_as_lines(file_path: str) -> Union[List[bytes], None]:
                try:
                    co = compile(line, "<str>", "exec")
                    data.extend(
-                        [
-                            i
-                            for i in co.co_consts
-                            if type(i) is bytes
-                            and i.startswith(b"PY00")
-                            and len(i) > 64
-                        ]
+                        [i for i in co.co_consts if type(i) is bytes and valid_bytes(i)]
                    )
-                except:
+                except Exception:
                    # ignore not compilable lines
                    pass
-    except:
+    except Exception:
        return None
    return data


+# XXX: use bytes view instead of copying slices
+
+
 def find_data_from_bytes(data: bytes, max_count=-1) -> List[bytes]:
    result = []
    idx = 0
    while len(result) != max_count:
-        idx = data.find(b"PY00")
+        idx = data.find(b"PY0")  # XXX: not necessarily starts with b"PY"
        if idx == -1:
            break
        data = data[idx:]
        if len(data) < 64:
+            # don't break if len > 64, maybe there is PY0blahPY0
            break
-        header_len = int.from_bytes(data[28:32], "little")
-        body_len = int.from_bytes(data[32:36], "little")
+        header_len = dword(data, 28)
+        body_len = dword(data, 32)
        if header_len > 256 or body_len > 0xFFFFF or header_len + body_len > len(data):
            # compressed or coincident, skip
-            data = data[5:]
+            data = data[4:]
            continue

        complete_object_length = header_len + body_len

        # maybe followed by data for other Python versions or another part of BCC
-        next_segment_offset = int.from_bytes(data[56:60], "little")
+        next_segment_offset = dword(data, 56)
        data_next = data[next_segment_offset:]
-        while (
-            next_segment_offset != 0
-            and data_next.startswith(b"PY00")
-            and len(data_next) >= 64
-        ):
-            header_len = int.from_bytes(data_next[28:32], "little")
-            body_len = int.from_bytes(data_next[32:36], "little")
+        while next_segment_offset != 0 and valid_bytes(data_next):
+            header_len = dword(data_next, 28)
+            body_len = dword(data_next, 32)
            complete_object_length = next_segment_offset + header_len + body_len

-            if int.from_bytes(data_next[56:60], "little") == 0:
+            if dword(data_next, 56) == 0:
                break
-            next_segment_offset += int.from_bytes(data_next[56:60], "little")
+            next_segment_offset += dword(data_next, 56)
            data_next = data[next_segment_offset:]

        result.append(data[:complete_object_length])
@@ -89,21 +86,23 @@ def find_data_from_bytes(data: bytes, max_count=-1) -> List[bytes]:
 def nuitka_package(
    head: bytes, relative_path: str
 ) -> Union[List[Tuple[str, bytes]], None]:
-    first_occurrence = head.find(b"PY00")
+    first_occurrence = head.find(b"PY0")
    if first_occurrence == -1:
        return None
    last_dot_bytecode = head.rfind(b".bytecode\x00", 0, first_occurrence)
    if last_dot_bytecode == -1:
        return None
-    length = int.from_bytes(head[last_dot_bytecode - 4 : last_dot_bytecode], "little")
+    length = dword(head, last_dot_bytecode - 4)
    end = last_dot_bytecode + length
    cur = last_dot_bytecode
    result = []
    while cur < end:
        module_name_len = head.find(b"\x00", cur, end) - cur
-        module_name = head[cur : cur + module_name_len].decode("utf-8")
+        module_name = head[cur : cur + module_name_len].decode(
+            "utf-8", errors="replace"
+        )
        cur += module_name_len + 1
-        module_len = int.from_bytes(head[cur : cur + 4], "little")
+        module_len = dword(head, cur)
        cur += 4
        module_data = find_data_from_bytes(head[cur : cur + module_len], 1)
        if module_data:
@@ -135,7 +134,7 @@ def detect_process(
    try:
        with open(file_path, "rb") as f:
            head = f.read(16 * 1024 * 1024)
-    except:
+    except Exception:
        logger.error(f"Failed to read file: {relative_path}")
        return None

--- a/oneshot/runtime.py
+++ b/oneshot/runtime.py
@@ -37,7 +37,7 @@ class RuntimeInfo:
            # TODO: implement for other platforms
            self.extract_info_win64()

-        self.serial_number = self.part_1[12:18].decode()
+        self.serial_number = self.part_1[12:18].decode("utf-8", errors="replace")
        self.runtime_aes_key = self.calc_aes_key()

    def __str__(self) -> str:
@@ -66,9 +66,21 @@ class RuntimeInfo:
        """
        with open(self.file_path, "rb") as f:
            data = f.read(16 * 1024 * 1024)
-        cur = data.index(b"pyarmor-vax")
+        cur = data.find(b"pyarmor-vax")
+        if cur == -1:
+            # Specially, check UPX (GH-12, GH-35)
+            if data.find(b"UPX!") != -1 and data.find(b"UPX0") != -1:
+                logger.error(
+                    f"{self.file_path} seems to be packed by UPX. Before it can be processed, you need to unpack it first: Download UPX from https://github.com/upx/upx, and run `upx -d {self.file_path}` (you may need to escape the file path) in the command line."
+                )
+            else:
+                logger.error(
+                    f"{self.file_path} does not contain 'pyarmor-vax'. Maybe it's packed, obfuscated, or generated by an unsupported version of Pyarmor."
+                )
+            raise ValueError(f"{self.file_path} does not contain 'pyarmor-vax'")

        if data[cur + 11 : cur + 18] == b"\x00" * 7:
+            # Do not log. Skip this file silently and find another.
            raise ValueError(f"{self.file_path} is a runtime template")

        # Align with pyd file and executable address:
@@ -78,7 +90,7 @@ class RuntimeInfo:

        if data[0x5C] & 1 != 0:
            logger.error(
-                'External key file ".pyarmor.ikey" is not supported yet, but it will be supported once we get a sample (like this one). Please open an issue on https://github.com/Lil-House/Pyarmor-Static-Unpack-1shot/issues to make this tool stronger.'
+                f'External key file ".pyarmor.ikey" is not supported yet, but it will be supported once we get a sample (like this one). Please open an issue on https://github.com/Lil-House/Pyarmor-Static-Unpack-1shot/issues to make this tool stronger. ({self.file_path})'
            )
            raise NotImplementedError(f'{self.file_path} uses ".pyarmor.ikey"')

--- a/oneshot/shot.py
+++ b/oneshot/shot.py
@@ -24,6 +24,7 @@ except ImportError:

 from detect import detect_process
 from runtime import RuntimeInfo
+from util import dword, bytes_sub


 # Initialize colorama
@@ -169,7 +170,7 @@ async def decrypt_process_async(
    async def process_file(relative_path, data):
        async with semaphore:
            try:
-                serial_number = data[2:8].decode("utf-8")
+                serial_number = data[2:8].decode("utf-8", errors="replace")
                runtime = runtimes[serial_number]
                logger.info(
                    f"{Fore.CYAN}Decrypting: {serial_number} ({relative_path}){Style.RESET_ALL}"
@@ -189,18 +190,16 @@ async def decrypt_process_async(
                        f.write(data)

                # Check BCC; mutates "data"
-                if int.from_bytes(data[20:24], "little") == 9:
-                    cipher_text_offset = int.from_bytes(data[28:32], "little")
-                    cipher_text_length = int.from_bytes(data[32:36], "little")
+                if dword(data, 20) == 9:
+                    cipher_text_offset = dword(data, 28)
+                    cipher_text_length = dword(data, 32)
                    nonce = data[36:40] + data[44:52]
                    bcc_aes_decrypted = general_aes_ctr_decrypt(
-                        data[
-                            cipher_text_offset : cipher_text_offset + cipher_text_length
-                        ],
+                        bytes_sub(data, cipher_text_offset, cipher_text_length),
                        runtime.runtime_aes_key,
                        nonce,
                    )
-                    data = data[int.from_bytes(data[56:60], "little") :]
+                    data = data[dword(data, 56) :]
                    bcc_architecture_mapping = {
                        0x2001: "win-x64",
                        0x2003: "linux-x64",
@@ -208,28 +207,21 @@ async def decrypt_process_async(
                    while True:
                        if len(bcc_aes_decrypted) < 16:
                            break
-                        bcc_segment_offset = int.from_bytes(
-                            bcc_aes_decrypted[0:4], "little"
-                        )
-                        bcc_segment_length = int.from_bytes(
-                            bcc_aes_decrypted[4:8], "little"
-                        )
-                        bcc_architecture_id = int.from_bytes(
-                            bcc_aes_decrypted[8:12], "little"
-                        )
-                        bcc_next_segment_offset = int.from_bytes(
-                            bcc_aes_decrypted[12:16], "little"
-                        )
+                        bcc_segment_offset = dword(bcc_aes_decrypted, 0)
+                        bcc_segment_length = dword(bcc_aes_decrypted, 4)
+                        bcc_architecture_id = dword(bcc_aes_decrypted, 8)
+                        bcc_next_segment_offset = dword(bcc_aes_decrypted, 12)
                        bcc_architecture = bcc_architecture_mapping.get(
                            bcc_architecture_id, f"0x{bcc_architecture_id:x}"
                        )
                        bcc_file_path = f"{dest_path}.1shot.bcc.{bcc_architecture}.so"
                        with open(bcc_file_path, "wb") as f:
                            f.write(
-                                bcc_aes_decrypted[
-                                    bcc_segment_offset : bcc_segment_offset
-                                    + bcc_segment_length
-                                ]
+                                bytes_sub(
+                                    bcc_aes_decrypted,
+                                    bcc_segment_offset,
+                                    bcc_segment_length,
+                                )
                            )
                        logger.info(
                            f"{Fore.GREEN}Extracted BCC mode native part: {bcc_file_path}{Style.RESET_ALL}"
@@ -238,8 +230,8 @@ async def decrypt_process_async(
                            break
                        bcc_aes_decrypted = bcc_aes_decrypted[bcc_next_segment_offset:]

-                cipher_text_offset = int.from_bytes(data[28:32], "little")
-                cipher_text_length = int.from_bytes(data[32:36], "little")
+                cipher_text_offset = dword(data, 28)
+                cipher_text_length = dword(data, 32)
                nonce = data[36:40] + data[44:52]
                seq_file_path = dest_path + ".1shot.seq"
                with open(seq_file_path, "wb") as f:
@@ -249,10 +241,7 @@ async def decrypt_process_async(
                    f.write(data[:cipher_text_offset])
                    f.write(
                        general_aes_ctr_decrypt(
-                            data[
-                                cipher_text_offset : cipher_text_offset
-                                + cipher_text_length
-                            ],
+                            bytes_sub(data, cipher_text_offset, cipher_text_length),
                            runtime.runtime_aes_key,
                            nonce,
                        )
@@ -412,16 +401,16 @@ def main():

    if not args.no_banner:
        print(rf"""{Fore.CYAN}
- ____                                                                     ____ 
+ ____                                                                     ____
 ( __ )                                                                   ( __ )
- |  |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|  | 
- |  |   ____                                      _ ___  _          _     |  | 
- |  |  |  _ \ _  _  __ _ _ __ _ _ __   ___  _ _  / / __|| |_   ___ | |_   |  | 
- |  |  | |_) | || |/ _` | '__| ' `  \ / _ \| '_| | \__ \| ' \ / _ \| __|  |  | 
- |  |  |  __/| || | (_| | |  | || || | (_) | |   | |__) | || | (_) | |_   |  | 
- |  |  |_|    \_, |\__,_|_|  |_||_||_|\___/|_|   |_|___/|_||_|\___/ \__|  |  | 
- |  |         |__/                                                        |  | 
- |__|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|__| 
+ |  |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|  |
+ |  |   ____                                      _ ___  _          _     |  |
+ |  |  |  _ \ _  _  __ _ _ __ _ _ __   ___  _ _  / / __|| |_   ___ | |_   |  |
+ |  |  | |_) | || |/ _` | '__| ' `  \ / _ \| '_| | \__ \| ' \ / _ \| __|  |  |
+ |  |  |  __/| || | (_| | |  | || || | (_) | |   | |__) | || | (_) | |_   |  |
+ |  |  |_|    \_, |\__,_|_|  |_||_||_|\___/|_|   |_|___/|_||_|\___/ \__|  |  |
+ |  |         |__/                                                        |  |
+ |__|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|__|
 (____)                                                        v0.2.2     (____)

              For technology exchange only. Use at your own risk.
@@ -493,7 +482,7 @@ def main():
            if file_name.endswith(".pyz"):
                with open(file_path, "rb") as f:
                    head = f.read(16 * 1024 * 1024)
-                if b"PY00" in head and (
+                if b"PY0" in head and (
                    not os.path.exists(file_path + "_extracted")
                    or len(os.listdir(file_path + "_extracted")) == 0
                ):
--- a/oneshot/util.py
+++ b/oneshot/util.py
@@ -2,5 +2,5 @@ def dword(buffer, idx: int) -> int:
    return int.from_bytes(buffer[idx : idx + 4], "little")


-def bytes_sub(buffer, start: int, length: int) -> int:
+def bytes_sub(buffer, start: int, length: int) -> bytes:
    return buffer[start : start + length]
--- a/pycdc/pyc_code.cpp
+++ b/pycdc/pyc_code.cpp
@@ -216,7 +216,7 @@ void PycCode::pyarmorDecryptCoCode(unsigned long consts_index, PycModule *mod)
        // Assume tail of code is not used there
        memset(
            &code_bytes[desc->decrypt_length],
-            9, // NOP
+            mod->verCompare(3, 13) == 0 ? 30 : mod->verCompare(3, 14) == 0 ? 27 : 9, // NOP
            desc->decrypt_begin_index);
    }
Author	SHA1	Message	Date
Lil-Ran	8fba94269c	fix: friendly message for upx (gh-12, gh-35)	2026-02-23 21:03:20 +08:00
Shunran Lei	fc4e96a164	Merge pull request #32 from lweipanw/main fix(detect & shot): not necessarily starts with b"PY00" e.g. PY01xxx	2026-01-31 01:39:45 +08:00
Lee Wei	0658f455f7	fix(detect & shot): not necessarily starts with b"PY00" e.g. PY01xxx	2026-01-31 01:30:43 +08:00
Lil-Ran	87b1c7a86e	fix: NOP is not 9 if minor version >= 13	2025-12-17 23:49:38 +08:00
Lil-Ran	3c104c02ac	ci(regtest): ensure commit message contains 'update' before pushing changes	2025-11-20 17:48:56 +08:00
Lil-Ran	d9a5dee5aa	refactor(scripts): replace int.from_bytes with dword	2025-11-20 16:06:53 +08:00
Lil-Ran	63b6228d00	fix(detect): not necessarily starts with b"PY"	2025-11-20 15:49:20 +08:00