4 Commits

Author SHA1 Message Date
e7397945d3 feat: file format detection, single file mode 2025-03-25 23:47:21 +08:00
48cab893c9 ci: build workflow 2025-03-15 23:26:59 +08:00
64df67ac8a fix: temp patch for robustness 2025-03-15 22:35:38 +08:00
d9e00c3762 docs: update readme 2025-03-07 11:08:08 +08:00
9 changed files with 294 additions and 173 deletions

55
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
name: Build
on:
push:
branches: [ci-build]
pull_request:
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- name: Build
run: |
mkdir build
cd build
cmake ..
cmake --build . --config Debug
cmake --install .
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: pyarmor-1shot-build-${{ matrix.os }}
path: |
helpers
README.md
README-Decompyle++.markdown
LICENSE
windows-build:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- name: Build
run: |
mkdir build
cd build
cmake -G "MinGW Makefiles" ..
cmake --build . --config Debug
cmake --install .
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: pyarmor-1shot-build-windows
path: |
helpers
README.md
README-Decompyle++.markdown
LICENSE

View File

@@ -1,67 +0,0 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
name: "CodeQL"
on:
push:
branches: [master]
pull_request:
# The branches below must be a subset of the branches above
branches: [master]
schedule:
- cron: '0 1 * * 2'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: ['cpp', 'python']
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Use only 'java' to analyze code written in Java, Kotlin or both
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- if: matrix.language == 'python'
name: Autobuild Python
uses: github/codeql-action/autobuild@v2
- if: matrix.language == 'cpp'
name: Build C++
run: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
make
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"

View File

@@ -1,29 +0,0 @@
name: Linux-CI
on:
push:
branches: [master]
pull_request:
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Configure and Build
run: |
(
mkdir build-debug && cd build-debug
cmake -DCMAKE_BUILD_TYPE=Debug ..
make -j4
)
(
mkdir build-release && cd build-release
cmake -DCMAKE_BUILD_TYPE=Debug ..
make -j4
)
- name: Test
run: |
cmake --build build-debug --target check
cmake --build build-release --target check

View File

@@ -1,29 +0,0 @@
name: MSVC-CI
on:
push:
branches: [master]
pull_request:
jobs:
build:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- name: Configure and Build
run: |
mkdir build
cd build
cmake -G "Visual Studio 17 2022" -A Win32 ..
cmake --build . --config Debug
cmake --build . --config Release
- name: Test
run: |
cmake --build build --config Debug --target check
cmake --build build --config Release --target check
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: pycdc-release
path: build\Release\*.exe

View File

@@ -1873,6 +1873,13 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
mod->verCompare(3, 11) >= 0 ? code->qualName()->value() : code->name()->value());
}
}
// BEGIN ONESHOT TEMPORARY PATCH
// The problem is not here, but in the way the blocks are created
// Add this to avoid segfault
if (blocks.top() == defblock)
break;
// END ONESHOT PATCH
PycRef<ASTBlock> tmp = curblock;
blocks.pop();

View File

@@ -1,12 +1,32 @@
# Pyarmor-Static-Unpack-1shot
# Pyarmor Static Unpack One-Shot Tool
Generally this project aims to statically convert (without executing) armored data - which can be regarded as an encrypted variant of pyc files - back to disassembly and (experimentally) source code. Therefore we forked the awesome [Decompyle++](https://github.com/zrax/pycdc) (aka pycdc).
[Pyarmor](https://github.com/dashingsoft/pyarmor) is a popular tool to protect Python source code. It turns Python scripts into binary data, which can be regarded as an encrypted variant of pyc files. They can be decrypted by a shared library (pyarmor_runtime) and then executed by Python interpreter.
Currently we are trying to support Pyarmor 8.0 - latest (9.1.1), Python 3.7 - 3.13, platforms covering Windows, Linux, macOS, and Android, with obfuscating options as many as possible. (However, we only have limited tests.)
This project aims to convert armored data back to bytecode assembly and (experimentally) source code. We forked the awesome [Decompyle++](https://github.com/zrax/pycdc) (aka pycdc), and added some processes on it like modifying abstract syntax tree.
If the data starts with `PY` followed by six digits, it is supported. Otherwise, if it starts with `PYARMOR`, it is generated by Pyarmor 7 or before, and is not supported.
> [!WARNING]
>
> **Disassembly results are accurate, but decompiled code can be incomplete and incorrect.** [See issue #3](https://github.com/Lil-House/Pyarmor-Static-Unpack-1shot/issues/3)
We cannot wait to make it public. Detailed write-up will be available soon. For those who are curious, temporarily you can check out [the similar work of G DATA Advanced Analytics](https://cyber.wtf/2025/02/12/unpacking-pyarmor-v8-scripts/).
## Features
### Static
You don't need to execute the encrypted script. We decrypt them using the same algorithm as pyarmor_runtime. This is useful when the scripts cannot be trusted.
### Universal
Currently we are trying to support Pyarmor 8.0 to 9.1.2 (latest), Python 3.7 - 3.13, on all operating systems, with obfuscating options as many as possible. (However, we only have limited tests.)
You can run this tool in any environment, no need to be the same with obfuscated scripts or runtime.
> [!NOTE]
>
> If the data starts with `PY` followed by six digits, it is supported. Otherwise, if it starts with `PYARMOR`, it is generated by Pyarmor 7 or earlier, and is not supported.
### Easy to use
The only thing you need to do is specifying where your obfuscated scripts are. The tool does everything like detecting armored data, parsing, disassembling, and decompiling. See "Usage" section below.
## Build
@@ -18,23 +38,25 @@ cmake --build .
cmake --install .
```
You can also download prebuilt binary files on [releases page](https://github.com/Lil-House/Pyarmor-Static-Unpack-1shot/releases).
## Usage
Make sure the executable `pyarmor-1shot` (`pyarmor-1shot.exe` on Windows) exists in `helpers` directory, and run `helpers/shot.py` in Python 3 (no need to use the same version with obfuscated scripts) with the "root" directory of obfuscated scripts. It will recursively find and handle `pyarmor_runtime` and as much armored data as possible. For example:
``` bash
$ ls /path/to/scripts
__pycache__ pyarmor_runtime_000000 obf_main.py plain_src.py util.pyc packed.so folder_with_other_scripts readme.unrelated
$ python /path/to/helpers/shot.py /path/to/scripts
python /path/to/helpers/shot.py /path/to/scripts
```
Before running `shot.py`, make sure the executable `pyarmor-1shot` (`pyarmor-1shot.exe` on Windows) exists in `helpers` directory.
You only need to specify the directory that contains all armored data and `pyarmor_runtime`. The tool finds and handles them recursively as much as possible.
When necessary, specify a `pyarmor_runtime` executable with `-r path/to/pyarmor_runtime[.pyd|.so|.dylib]`.
All files generated from this tool have a `.1shot.` in file names. If you want to save them in another directory instead of in-place, use `-o another/path/`. Folder structure will remain unchanged.
Note:
- Subdirectories called `__pycache__` or `site-packages` will not be touched, and symbolic links will not be followed, to avoid repeat or forever loop and save time. If you really need them, run the script later in these directories (as "root" directory) and specify the runtime.
- Subdirectories will not be touched if the folder name is exactly `__pycache__` or `site-packages` or it directly contains a file named `.no1shot`, and symbolic links will not be followed, to avoid repeat or forever loop and save time. If you really need them, run the script later in these directories and specify the runtime.
- Archives, executables generated by PyInstaller and so on, must be unpacked by other tools before decrypting, or you will encounter undefined behavior.
## Feedback
@@ -43,10 +65,7 @@ Feel free to open an issue if you have any questions, suggestions, or problems.
## Todo (PR Welcome!)
- [ ] Write-up
- [ ] Multi-platform pyarmor_runtime executable
- [ ] Accept more input forms
- [ ] Tests for different Pyarmor and Python versions
- [ ] Support more obfuscating options
- [ ] Use asyncio for concurrency
- [ ] Pyarmor 7 and before (Later or never.)

157
helpers/detect.py Normal file
View File

@@ -0,0 +1,157 @@
import logging
import os
from typing import List, Tuple
def ascii_ratio(data: bytes) -> float:
return sum(32 <= c < 127 for c in data) / len(data)
def source_as_file(file_path: str) -> List[bytes] | None:
try:
with open(file_path, 'r') as f:
co = compile(f.read(), '<str>', 'exec')
data = [i for i in co.co_consts if type(i) is bytes
and i.startswith(b'PY00') and len(i) > 64]
return data
except:
return None
def source_as_lines(file_path: str) -> List[bytes] | None:
data = []
try:
with open(file_path, 'r') as f:
for line in f:
try:
co = compile(line, '<str>', 'exec')
data.extend([i for i in co.co_consts if type(i) is bytes
and i.startswith(b'PY00') and len(i) > 64])
except:
# ignore not compilable lines
pass
except:
return None
return data
def find_data_from_bytes(data: bytes, max_count=-1) -> List[bytes]:
result = []
idx = 0
while len(result) != max_count:
idx = data.find(b'PY00')
if idx == -1:
break
data = data[idx:]
if len(data) < 64:
break
header_len = int.from_bytes(data[28:32], 'little')
body_len = int.from_bytes(data[32:36], 'little')
if header_len > 256 or body_len > 0xFFFFF or header_len + body_len > len(data):
# compressed or coincident, skip
data = data[5:]
continue
result.append(data[:header_len + body_len])
# maybe followed by data for other Python versions from the same file,
# we do not extract them
followed_by_another_equivalent = int.from_bytes(
data[56:60], 'little') != 0
data = data[header_len + body_len:]
while followed_by_another_equivalent \
and data.startswith(b'PY00') \
and len(data) >= 64:
header_len = int.from_bytes(data[28:32], 'little')
body_len = int.from_bytes(data[32:36], 'little')
followed_by_another_equivalent = int.from_bytes(
data[56:60], 'little') != 0
data = data[header_len + body_len:]
return result
def nuitka_package(head: bytes, relative_path: str) -> None | List[Tuple[str, bytes]]:
first_occurrence = head.find(b'PY00')
if first_occurrence == -1:
return None
last_dot_bytecode = head.rfind(b'.bytecode\x00', 0, first_occurrence)
if last_dot_bytecode == -1:
return None
length = int.from_bytes(
head[last_dot_bytecode-4:last_dot_bytecode], 'little')
end = last_dot_bytecode + length
cur = last_dot_bytecode
result = []
while cur < end:
module_name_len = head.find(b'\x00', cur, end) - cur
module_name = head[cur:cur + module_name_len].decode('utf-8')
cur += module_name_len + 1
module_len = int.from_bytes(head[cur:cur + 4], 'little')
cur += 4
module_data = find_data_from_bytes(head[cur:cur + module_len], 1)
if module_data:
result.append((os.path.join(relative_path.rstrip(
'/\\') + '.1shot.ext', module_name), module_data[0]))
cur += module_len
if result:
logger = logging.getLogger('detect')
logger.info(f'Found data in Nuitka package: {relative_path}')
return result
return None
def detect_process(file_path: str, relative_path: str) -> None | List[Tuple[str, bytes]]:
'''
Returns a list of (relative_path, bytes_raw) tuples, or None.
Do not raise exceptions.
'''
logger = logging.getLogger('detect')
try:
with open(file_path, 'rb') as f:
head = f.read(16 * 1024 * 1024)
except:
logger.error(f'Failed to read file: {relative_path}')
return None
if b'__pyarmor__' not in head:
# no need to dig deeper
return None
if ascii_ratio(head[:2048]) >= 0.9:
# the whole file may not be compiled, but we can still try some lines;
# None means failure (then we make another try),
# empty list means success but no data found (then we skip this file)
result = source_as_file(file_path)
if result is None:
result = source_as_lines(file_path)
if result is None:
return None
match len(result):
case 0:
return None
case 1:
logger.info(f'Found data in source: {relative_path}')
return [(relative_path, result[0])]
case _:
logger.info(f'Found data in source: {relative_path}')
return [(f'{relative_path}__{i}', result[i]) for i in range(len(result))]
# binary file
# ignore data after 16MB, before we have a reason to read more
if b'Error, corrupted constants object' in head:
# an interesting special case: packer put armored data in a Nuitka package
# we can know the exact module names, instead of adding boring __0, __1, ...
return nuitka_package(head, relative_path)
result = find_data_from_bytes(head)
match len(result):
case 0:
return None
case 1:
logger.info(f'Found data in binary: {relative_path}')
return [(relative_path, result[0])]
case _:
logger.info(f'Found data in binary: {relative_path}')
return [(f'{relative_path}__{i}', result[i]) for i in range(len(result))]

View File

@@ -5,6 +5,7 @@ import os
import subprocess
from typing import Dict, List, Tuple
from detect import detect_process
from runtime import RuntimeInfo
@@ -157,6 +158,7 @@ def main():
if args.runtime:
specified_runtime = RuntimeInfo(args.runtime)
print(specified_runtime)
runtimes = {specified_runtime.serial_number: specified_runtime}
else:
specified_runtime = None
@@ -167,23 +169,50 @@ def main():
if args.output_dir and not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
if os.path.isfile(args.directory):
if specified_runtime is None:
logger.error('Please specify `pyarmor_runtime` file by `-r` if input is a file')
return
logger.info('Single file mode')
result = detect_process(args.directory, args.directory)
if result is None:
logger.error('No armored data found')
return
sequences.extend(result)
decrypt_process(runtimes, sequences, args)
return # single file mode ends here
dir_path: str
dirs: List[str]
files: List[str]
for dir_path, dirs, files in os.walk(args.directory, followlinks=False):
if '.no1shot' in files:
logger.info(f'Skipping {dir_path} because of `.no1shot`')
dirs.clear()
files.clear()
continue
for d in ['__pycache__', 'site-packages']:
if d in dirs:
dirs.remove(d)
for file_name in files:
if '.1shot.' in file_name:
continue
handled = False
file_path = os.path.join(dir_path, file_name)
relative_path = os.path.relpath(file_path, args.directory)
if file_name.endswith('.pyz'):
with open(file_path, 'rb') as f:
head = f.read(16 * 1024 * 1024)
if b'PY00' in head \
and (not os.path.exists(file_path + '_extracted')
or len(os.listdir(file_path + '_extracted')) == 0):
logger.error(
f'A PYZ file containing armored data is detected, but the PYZ file has not been extracted by other tools. This error is not a problem with this tool. If the folder is extracted by Pyinstxtractor, please read the output information of Pyinstxtractor carefully. ({relative_path})')
continue
# is pyarmor_runtime?
if not handled \
and specified_runtime is None \
if specified_runtime is None \
and file_name.startswith('pyarmor_runtime') \
and file_name.endswith(('.pyd', '.so', '.dylib')):
try:
@@ -192,40 +221,13 @@ def main():
logger.info(
f'Found new runtime: {new_runtime.serial_number} ({file_path})')
print(new_runtime)
handled = True
continue
except:
pass
try:
with open(file_path, 'rb') as f:
beacon = f.read(16 * 1024 * 1024)
except:
logger.error(f'Failed to read file: {relative_path}')
continue
# is UTF-8 source?
# TODO: only support natural one line now
if not handled and b'__pyarmor__(__name__, __file__,' in beacon:
try:
with open(file_path, 'r') as f:
for line in f:
if line.startswith('__pyarmor__(') and line.rstrip().endswith(')'):
co = compile(line, '<str>', 'exec')
bytes_raw = co.co_consts[0]
assert type(bytes_raw) is bytes
assert bytes_raw.startswith(b'PY')
assert len(bytes_raw) > 64
break
logger.info(f'Found data in source: {relative_path}')
# FIXME: bytes_raw can be kept from last iteration
sequences.append((relative_path, bytes_raw))
del bytes_raw
handled = True
except Exception as e:
logger.error(f'Assume source, but {e} ({file_path})')
# TODO: is Nuitka package?
# TODO: is pyc or single marshalled binary?
result = detect_process(file_path, relative_path)
if result is not None:
sequences.extend(result)
if not runtimes:
logger.error('No runtime found')

View File

@@ -86,6 +86,9 @@ int main(int argc, char* argv[])
return 1;
}
das_out_file.flush();
das_out_file.close();
dc_out_file << "# Source Generated with Decompyle++\n";
formatted_print(dc_out_file, "# File: %s (Python %d.%d%s)\n\n", dispname,
mod.majorVer(), mod.minorVer(),
@@ -97,5 +100,8 @@ int main(int argc, char* argv[])
return 1;
}
dc_out_file.flush();
dc_out_file.close();
return 0;
}