wip: extract data

This commit is contained in:
2025-02-27 14:01:14 +08:00
parent 5e1c4037a9
commit 79e22e193a
5 changed files with 319 additions and 0 deletions

48
README.md Normal file
View File

@@ -0,0 +1,48 @@
# Pyarmor-Static-Unpack-1shot
🚧 **Working in progress**
Generally this project aims to statically convert (without executing) armored data - which can be regarded as an encrypted variant of pyc files - back to disassembly and (experimentally) source code. Therefore we forked the awesome [Decompyle++](https://github.com/zrax/pycdc) (aka pycdc).
Currently we are trying to support Pyarmor 8.0 - latest (9.0.8), Python 3.6 - 3.13, platforms covering Windows, Linux, macOS, and Android, with obfuscating options as many as possible. (However, we only have limited tests.)
We cannot wait to make it public. Detailed write-up will be available soon. For those who are curious, temporarily you can check out [the similar work of G DATA Advanced Analytics](https://cyber.wtf/2025/02/12/unpacking-pyarmor-v8-scripts/).
## Build
``` bash
mkdir build
cd build
cmake ..
cmake --build .
mv pyarmor-1shot[.exe] ../helpers
```
## Usage
Make sure the executable `pyarmor-1shot` exists in `helpers` directory, and run `helpers/shot.py` in Python 3 (no need to use the same version with obfuscated scripts) with the "root" directory of obfuscated scripts. It will recursively find and handle `pyarmor_runtime` and as much armored data as possible. For example:
``` bash
$ ls /path/to/scripts
__pycache__ pyarmor_runtime_000000 obf_main.py plain_src.py util.pyc packed.so folder_with_other_scripts readme.unrelated
$ python /path/to/helpers/shot.py /path/to/scripts
```
When necessary, specify a `pyarmor_runtime` executable with `-r path/to/pyarmor_runtime[.pyd|.so|.dylib]`.
All files generated from this tool have a `.1shot.` in file names. If you want to save them in another directory instead of in-place, use `-o another/path/`. Folder structure will remain unchanged.
Note:
- Subdirectories called `__pycache__` or `site-packages` will not be touched, and symbolic links will not be followed, to avoid repeat or forever loop and save time. If you really need them, run the script later in these directories (as "root" directory) and specify the runtime.
- Archives, executables generated by PyInstaller and so on, must be unpacked by other tools before decrypting, or you will encounter undefined behavior.
## Todo (PR Welcome!)
- [ ] Write-up
- [ ] Multi-platform pyarmor_runtime executable
- [ ] Accept more input forms
- [ ] Tests for different Pyarmor and Python versions
- [ ] Support more obfuscating options
- [ ] Use asyncio for concurrency
- [ ] Pyarmor 7 and before (Later or never.)

0
helpers/__init__.py Normal file
View File

93
helpers/runtime.py Normal file
View File

@@ -0,0 +1,93 @@
import hashlib
GLOBAL_CERT = bytes.fromhex('''
30 82 01 0a 02 82 01 01 00 bf 65 30 f3 bd 67 e7
a6 9d f8 db 18 b2 b9 c1 c0 5f fe fb e5 4b 91 df
6f 38 da 51 cc ea c4 d3 04 bd 95 27 86 c1 13 ca
73 15 44 4d 97 f5 10 b9 52 21 72 16 c8 b2 84 5f
45 56 32 e7 c2 6b ad 2b d9 df 52 d6 e9 d1 2a ba
35 e4 43 ab 54 e7 91 c5 ce d1 f1 ba a5 9f f4 ca
db 89 04 3d f8 9f 6a 8b 8a 29 39 f8 4c 0d b8 a0
6d 51 c4 74 24 64 fe 1a 23 97 f3 61 ea de c8 97
dc 57 60 34 be 2c 18 50 3b d1 76 3b 49 2a 39 9a
37 18 53 8f 1d 4c 82 b1 a0 33 43 57 19 ad 67 e7
af 09 fb 04 54 a9 ea c0 c1 e9 32 6c 77 92 7f 9f
7c 08 7c e8 a1 5d a4 fc 40 e6 6e 18 db bf 45 53
4b 5c a7 9d f2 8f 7e 6c 04 b0 4d ee 99 25 9a 87
84 6e 9e fe 3c 72 ec b0 64 dd 2e db ad 32 fa 1d
4b 2c 1a 78 85 7c bc 2c d0 d7 83 77 5f 92 d5 db
59 10 96 53 2e 5d c7 42 12 b8 61 cb 2c 5f 46 14
9e 93 b0 53 21 a2 74 34 2d 02 03 01 00 01
''')
class RuntimeInfo:
def __init__(self, file_path: str) -> None:
self.file_path = file_path
if file_path.endswith('.pyd'):
self.extract_info_win64()
else:
# TODO: implement for other platforms
self.extract_info_win64()
self.serial_number = self.part_1[12:18].decode()
self.runtime_aes_key = self.calc_aes_key()
def __str__(self) -> str:
trial = self.serial_number == '000000'
product = ''
for c in self.part_3[2:]:
if 32 <= c <= 126:
product += chr(c)
else:
break
return f'''\
========================
PyArmor Runtime ({'Trial' if trial else self.serial_number}) Information:
Product: {product}
AES key: {self.runtime_aes_key.hex()}
Mix string AES nonce: {self.mix_str_aes_nonce().hex()}
========================'''
def __repr__(self) -> str:
return f'RuntimeInfo(part_1={self.part_1}, part_2={self.part_2}, part_3={self.part_3})'
def extract_info_win64(self) -> None:
'''
Try to find useful information from `pyarmor_runtime.pyd` file,
and store all three parts in the object.
'''
with open(self.file_path, 'rb') as f:
data = f.read(16 * 1024 * 1024)
cur = data.index(b'pyarmor-vax')
self.part_1 = data[cur:cur+20]
cur += 36
part_2_offset = int.from_bytes(data[cur:cur+4], 'little')
part_2_len = int.from_bytes(data[cur+4:cur+8], 'little')
part_3_offset = int.from_bytes(data[cur+8:cur+12], 'little')
cur += 16
self.part_2 = data[cur+part_2_offset:cur+part_2_offset+part_2_len]
cur += part_3_offset
part_3_len = int.from_bytes(data[cur+4:cur+8], 'little')
cur += 32
self.part_3 = data[cur:cur+part_3_len]
def calc_aes_key(self) -> bytes:
return hashlib.md5(self.part_1 + self.part_2 + self.part_3 + GLOBAL_CERT).digest()
def mix_str_aes_nonce(self) -> bytes:
return self.part_3[:12]
if __name__ == '__main__':
import sys
if len(sys.argv) < 2:
print('Usage: python runtime.py path/to/pyarmor_runtime[.pyd|.so|.dylib]')
exit(1)
for i in sys.argv[1:]:
runtime = RuntimeInfo(i)
print(runtime)

178
helpers/shot.py Normal file
View File

@@ -0,0 +1,178 @@
import argparse
from Crypto.Cipher import AES
import logging
import os
import subprocess
from runtime import RuntimeInfo
SUBPROCESS_TIMEOUT = 30
def general_aes_ctr_decrypt(data: bytes, key: bytes, nonce: bytes) -> bytes:
cipher = AES.new(key, AES.MODE_CTR, nonce=nonce, initial_value=2)
return cipher.decrypt(data)
def decrypt_process(runtimes: dict[str, RuntimeInfo], sequences: list[tuple[str, bytes]], output_dir: str = None):
logger = logging.getLogger('shot')
for path, data in sequences:
try:
serial_number = data[2:8].decode('utf-8')
runtime = runtimes[serial_number]
logger.info(f'Decrypting: {serial_number} ({path})')
dest_path = os.path.join(output_dir, path) if output_dir else path
dest_dir = os.path.dirname(dest_path)
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
with open(dest_path + '.1shot.raw', 'wb') as f:
f.write(data)
cipher_text_offset = int.from_bytes(data[28:32], 'little')
cipher_text_length = int.from_bytes(data[32:36], 'little')
nonce = data[36:40] + data[44:52]
with open(dest_path + '.1shot.seq', 'wb') as f:
f.write(b'\xa1' + runtime.runtime_aes_key)
f.write(b'\xa2' + runtime.mix_str_aes_nonce())
f.write(b'\xf0\xf0')
f.write(data[:cipher_text_offset])
f.write(general_aes_ctr_decrypt(
data[cipher_text_offset:cipher_text_offset+cipher_text_length], runtime.runtime_aes_key, nonce))
f.write(data[cipher_text_offset+cipher_text_length:])
exe_name = 'pyarmor-1shot.exe' if os.name == 'nt' else 'pyarmor-1shot'
exe_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), exe_name)
# TODO: multi process
sp = subprocess.run(
[
exe_path,
dest_path + '.1shot.seq',
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=SUBPROCESS_TIMEOUT,
)
stdout = sp.stdout.decode().splitlines()
stderr = sp.stderr.decode().splitlines()
for line in stdout:
logger.warning(f'STDOUT {line} ({path})')
for line in stderr:
if line.startswith('Warning'):
logger.warning(f'STDERR {line} ({path})')
else:
logger.error(f'STDERR {line} ({path})')
except Exception as e:
logger.error(f'Decrypt failed: {e} ({path})')
continue
def parse_args():
parser = argparse.ArgumentParser(
description='Pyarmor Static Unpack 1 Shot Entry')
parser.add_argument(
'directory',
help='the "root" directory of obfuscated scripts',
type=str,
)
parser.add_argument(
'-r',
'--runtime',
help='path to pyarmor_runtime[.pyd|.so|.dylib]',
type=str, # argparse.FileType('rb'),
)
parser.add_argument(
'-o',
'--output-dir',
help='save output files in another directory instead of in-place, with folder structure remain unchanged',
type=str,
)
return parser.parse_args()
def main():
args = parse_args()
logging.basicConfig(
level=logging.INFO,
format='%(levelname)-8s %(asctime)-28s %(message)s',
)
logger = logging.getLogger('shot')
if args.runtime:
specified_runtime = RuntimeInfo(args.runtime)
runtimes = {specified_runtime.serial_number: specified_runtime}
else:
specified_runtime = None
runtimes = {}
sequences: list[tuple[str, bytes]] = []
if args.output_dir and not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
dir_path: str
dirs: list[str]
files: list[str]
for dir_path, dirs, files in os.walk(args.directory, followlinks=False):
for d in ['__pycache__', 'site-packages']:
if d in dirs:
dirs.remove(d)
for file_name in files:
if '.1shot.' in file_name:
continue
handled = False
file_path = os.path.join(dir_path, file_name)
relative_path = os.path.relpath(file_path, args.directory)
# is pyarmor_runtime?
if not handled \
and specified_runtime is None \
and file_name.startswith('pyarmor_runtime') \
and not file_name.endswith(('.lnk', '.i64', '.idb', '.id0', '.id1',
'.id2', '.nam', '.til', '.bak')):
try:
new_runtime = RuntimeInfo(file_path)
runtimes[new_runtime.serial_number] = new_runtime
logger.info(
f'Found new runtime: {new_runtime.serial_number} ({file_path})')
print(new_runtime)
handled = True
except:
pass
with open(file_path, 'rb') as f:
beacon = f.read(16 * 1024 * 1024)
# is UTF-8 source?
# TODO: only support natural one line now
if not handled and b'__pyarmor__(__name__, __file__,' in beacon:
try:
with open(file_path, 'r') as f:
for line in f:
if line.startswith('__pyarmor__(') and line.rstrip().endswith(')'):
co = compile(line, '<str>', 'exec')
bytes_raw = co.co_consts[0]
assert type(bytes_raw) is bytes
assert bytes_raw.startswith(b'PY')
assert len(bytes_raw) > 64
break
logger.info(f'Found data in source: {relative_path}')
# FIXME: bytes_raw can be kept from last iteration
sequences.append((relative_path, bytes_raw))
del bytes_raw
handled = True
except Exception as e:
logger.error(f'Assume source, but {e} ({file_path})')
# TODO: is Nuitka package?
# TODO: is pyc or single marshalled binary?
# print(runtimes, [(i[0], i[1][:16]) for i in sequences], args.output_dir or args.directory)
decrypt_process(runtimes, sequences, args.output_dir or args.directory)
if __name__ == '__main__':
main()