wip: extract data
This commit is contained in:
48
README.md
Normal file
48
README.md
Normal file
@@ -0,0 +1,48 @@
|
||||
# Pyarmor-Static-Unpack-1shot
|
||||
|
||||
🚧 **Working in progress**
|
||||
|
||||
Generally this project aims to statically convert (without executing) armored data - which can be regarded as an encrypted variant of pyc files - back to disassembly and (experimentally) source code. Therefore we forked the awesome [Decompyle++](https://github.com/zrax/pycdc) (aka pycdc).
|
||||
|
||||
Currently we are trying to support Pyarmor 8.0 - latest (9.0.8), Python 3.6 - 3.13, platforms covering Windows, Linux, macOS, and Android, with obfuscating options as many as possible. (However, we only have limited tests.)
|
||||
|
||||
We cannot wait to make it public. Detailed write-up will be available soon. For those who are curious, temporarily you can check out [the similar work of G DATA Advanced Analytics](https://cyber.wtf/2025/02/12/unpacking-pyarmor-v8-scripts/).
|
||||
|
||||
## Build
|
||||
|
||||
``` bash
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cmake --build .
|
||||
mv pyarmor-1shot[.exe] ../helpers
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Make sure the executable `pyarmor-1shot` exists in `helpers` directory, and run `helpers/shot.py` in Python 3 (no need to use the same version with obfuscated scripts) with the "root" directory of obfuscated scripts. It will recursively find and handle `pyarmor_runtime` and as much armored data as possible. For example:
|
||||
|
||||
``` bash
|
||||
$ ls /path/to/scripts
|
||||
__pycache__ pyarmor_runtime_000000 obf_main.py plain_src.py util.pyc packed.so folder_with_other_scripts readme.unrelated
|
||||
$ python /path/to/helpers/shot.py /path/to/scripts
|
||||
```
|
||||
|
||||
When necessary, specify a `pyarmor_runtime` executable with `-r path/to/pyarmor_runtime[.pyd|.so|.dylib]`.
|
||||
|
||||
All files generated from this tool have a `.1shot.` in file names. If you want to save them in another directory instead of in-place, use `-o another/path/`. Folder structure will remain unchanged.
|
||||
|
||||
Note:
|
||||
|
||||
- Subdirectories called `__pycache__` or `site-packages` will not be touched, and symbolic links will not be followed, to avoid repeat or forever loop and save time. If you really need them, run the script later in these directories (as "root" directory) and specify the runtime.
|
||||
- Archives, executables generated by PyInstaller and so on, must be unpacked by other tools before decrypting, or you will encounter undefined behavior.
|
||||
|
||||
## Todo (PR Welcome!)
|
||||
|
||||
- [ ] Write-up
|
||||
- [ ] Multi-platform pyarmor_runtime executable
|
||||
- [ ] Accept more input forms
|
||||
- [ ] Tests for different Pyarmor and Python versions
|
||||
- [ ] Support more obfuscating options
|
||||
- [ ] Use asyncio for concurrency
|
||||
- [ ] Pyarmor 7 and before (Later or never.)
|
0
helpers/__init__.py
Normal file
0
helpers/__init__.py
Normal file
93
helpers/runtime.py
Normal file
93
helpers/runtime.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import hashlib
|
||||
|
||||
|
||||
GLOBAL_CERT = bytes.fromhex('''
|
||||
30 82 01 0a 02 82 01 01 00 bf 65 30 f3 bd 67 e7
|
||||
a6 9d f8 db 18 b2 b9 c1 c0 5f fe fb e5 4b 91 df
|
||||
6f 38 da 51 cc ea c4 d3 04 bd 95 27 86 c1 13 ca
|
||||
73 15 44 4d 97 f5 10 b9 52 21 72 16 c8 b2 84 5f
|
||||
45 56 32 e7 c2 6b ad 2b d9 df 52 d6 e9 d1 2a ba
|
||||
35 e4 43 ab 54 e7 91 c5 ce d1 f1 ba a5 9f f4 ca
|
||||
db 89 04 3d f8 9f 6a 8b 8a 29 39 f8 4c 0d b8 a0
|
||||
6d 51 c4 74 24 64 fe 1a 23 97 f3 61 ea de c8 97
|
||||
dc 57 60 34 be 2c 18 50 3b d1 76 3b 49 2a 39 9a
|
||||
37 18 53 8f 1d 4c 82 b1 a0 33 43 57 19 ad 67 e7
|
||||
af 09 fb 04 54 a9 ea c0 c1 e9 32 6c 77 92 7f 9f
|
||||
7c 08 7c e8 a1 5d a4 fc 40 e6 6e 18 db bf 45 53
|
||||
4b 5c a7 9d f2 8f 7e 6c 04 b0 4d ee 99 25 9a 87
|
||||
84 6e 9e fe 3c 72 ec b0 64 dd 2e db ad 32 fa 1d
|
||||
4b 2c 1a 78 85 7c bc 2c d0 d7 83 77 5f 92 d5 db
|
||||
59 10 96 53 2e 5d c7 42 12 b8 61 cb 2c 5f 46 14
|
||||
9e 93 b0 53 21 a2 74 34 2d 02 03 01 00 01
|
||||
''')
|
||||
|
||||
|
||||
class RuntimeInfo:
|
||||
def __init__(self, file_path: str) -> None:
|
||||
self.file_path = file_path
|
||||
if file_path.endswith('.pyd'):
|
||||
self.extract_info_win64()
|
||||
else:
|
||||
# TODO: implement for other platforms
|
||||
self.extract_info_win64()
|
||||
|
||||
self.serial_number = self.part_1[12:18].decode()
|
||||
self.runtime_aes_key = self.calc_aes_key()
|
||||
|
||||
def __str__(self) -> str:
|
||||
trial = self.serial_number == '000000'
|
||||
product = ''
|
||||
for c in self.part_3[2:]:
|
||||
if 32 <= c <= 126:
|
||||
product += chr(c)
|
||||
else:
|
||||
break
|
||||
return f'''\
|
||||
========================
|
||||
PyArmor Runtime ({'Trial' if trial else self.serial_number}) Information:
|
||||
Product: {product}
|
||||
AES key: {self.runtime_aes_key.hex()}
|
||||
Mix string AES nonce: {self.mix_str_aes_nonce().hex()}
|
||||
========================'''
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'RuntimeInfo(part_1={self.part_1}, part_2={self.part_2}, part_3={self.part_3})'
|
||||
|
||||
def extract_info_win64(self) -> None:
|
||||
'''
|
||||
Try to find useful information from `pyarmor_runtime.pyd` file,
|
||||
and store all three parts in the object.
|
||||
'''
|
||||
with open(self.file_path, 'rb') as f:
|
||||
data = f.read(16 * 1024 * 1024)
|
||||
cur = data.index(b'pyarmor-vax')
|
||||
|
||||
self.part_1 = data[cur:cur+20]
|
||||
|
||||
cur += 36
|
||||
part_2_offset = int.from_bytes(data[cur:cur+4], 'little')
|
||||
part_2_len = int.from_bytes(data[cur+4:cur+8], 'little')
|
||||
part_3_offset = int.from_bytes(data[cur+8:cur+12], 'little')
|
||||
cur += 16
|
||||
self.part_2 = data[cur+part_2_offset:cur+part_2_offset+part_2_len]
|
||||
|
||||
cur += part_3_offset
|
||||
part_3_len = int.from_bytes(data[cur+4:cur+8], 'little')
|
||||
cur += 32
|
||||
self.part_3 = data[cur:cur+part_3_len]
|
||||
|
||||
def calc_aes_key(self) -> bytes:
|
||||
return hashlib.md5(self.part_1 + self.part_2 + self.part_3 + GLOBAL_CERT).digest()
|
||||
|
||||
def mix_str_aes_nonce(self) -> bytes:
|
||||
return self.part_3[:12]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) < 2:
|
||||
print('Usage: python runtime.py path/to/pyarmor_runtime[.pyd|.so|.dylib]')
|
||||
exit(1)
|
||||
for i in sys.argv[1:]:
|
||||
runtime = RuntimeInfo(i)
|
||||
print(runtime)
|
178
helpers/shot.py
Normal file
178
helpers/shot.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import argparse
|
||||
from Crypto.Cipher import AES
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from runtime import RuntimeInfo
|
||||
|
||||
|
||||
SUBPROCESS_TIMEOUT = 30
|
||||
|
||||
|
||||
def general_aes_ctr_decrypt(data: bytes, key: bytes, nonce: bytes) -> bytes:
|
||||
cipher = AES.new(key, AES.MODE_CTR, nonce=nonce, initial_value=2)
|
||||
return cipher.decrypt(data)
|
||||
|
||||
|
||||
def decrypt_process(runtimes: dict[str, RuntimeInfo], sequences: list[tuple[str, bytes]], output_dir: str = None):
|
||||
logger = logging.getLogger('shot')
|
||||
for path, data in sequences:
|
||||
try:
|
||||
serial_number = data[2:8].decode('utf-8')
|
||||
runtime = runtimes[serial_number]
|
||||
logger.info(f'Decrypting: {serial_number} ({path})')
|
||||
|
||||
dest_path = os.path.join(output_dir, path) if output_dir else path
|
||||
dest_dir = os.path.dirname(dest_path)
|
||||
if not os.path.exists(dest_dir):
|
||||
os.makedirs(dest_dir)
|
||||
|
||||
with open(dest_path + '.1shot.raw', 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
cipher_text_offset = int.from_bytes(data[28:32], 'little')
|
||||
cipher_text_length = int.from_bytes(data[32:36], 'little')
|
||||
nonce = data[36:40] + data[44:52]
|
||||
with open(dest_path + '.1shot.seq', 'wb') as f:
|
||||
f.write(b'\xa1' + runtime.runtime_aes_key)
|
||||
f.write(b'\xa2' + runtime.mix_str_aes_nonce())
|
||||
f.write(b'\xf0\xf0')
|
||||
f.write(data[:cipher_text_offset])
|
||||
f.write(general_aes_ctr_decrypt(
|
||||
data[cipher_text_offset:cipher_text_offset+cipher_text_length], runtime.runtime_aes_key, nonce))
|
||||
f.write(data[cipher_text_offset+cipher_text_length:])
|
||||
|
||||
exe_name = 'pyarmor-1shot.exe' if os.name == 'nt' else 'pyarmor-1shot'
|
||||
exe_path = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), exe_name)
|
||||
# TODO: multi process
|
||||
sp = subprocess.run(
|
||||
[
|
||||
exe_path,
|
||||
dest_path + '.1shot.seq',
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=SUBPROCESS_TIMEOUT,
|
||||
)
|
||||
stdout = sp.stdout.decode().splitlines()
|
||||
stderr = sp.stderr.decode().splitlines()
|
||||
for line in stdout:
|
||||
logger.warning(f'STDOUT {line} ({path})')
|
||||
for line in stderr:
|
||||
if line.startswith('Warning'):
|
||||
logger.warning(f'STDERR {line} ({path})')
|
||||
else:
|
||||
logger.error(f'STDERR {line} ({path})')
|
||||
except Exception as e:
|
||||
logger.error(f'Decrypt failed: {e} ({path})')
|
||||
continue
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Pyarmor Static Unpack 1 Shot Entry')
|
||||
parser.add_argument(
|
||||
'directory',
|
||||
help='the "root" directory of obfuscated scripts',
|
||||
type=str,
|
||||
)
|
||||
parser.add_argument(
|
||||
'-r',
|
||||
'--runtime',
|
||||
help='path to pyarmor_runtime[.pyd|.so|.dylib]',
|
||||
type=str, # argparse.FileType('rb'),
|
||||
)
|
||||
parser.add_argument(
|
||||
'-o',
|
||||
'--output-dir',
|
||||
help='save output files in another directory instead of in-place, with folder structure remain unchanged',
|
||||
type=str,
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(levelname)-8s %(asctime)-28s %(message)s',
|
||||
)
|
||||
logger = logging.getLogger('shot')
|
||||
|
||||
if args.runtime:
|
||||
specified_runtime = RuntimeInfo(args.runtime)
|
||||
runtimes = {specified_runtime.serial_number: specified_runtime}
|
||||
else:
|
||||
specified_runtime = None
|
||||
runtimes = {}
|
||||
|
||||
sequences: list[tuple[str, bytes]] = []
|
||||
|
||||
if args.output_dir and not os.path.exists(args.output_dir):
|
||||
os.makedirs(args.output_dir)
|
||||
|
||||
dir_path: str
|
||||
dirs: list[str]
|
||||
files: list[str]
|
||||
for dir_path, dirs, files in os.walk(args.directory, followlinks=False):
|
||||
for d in ['__pycache__', 'site-packages']:
|
||||
if d in dirs:
|
||||
dirs.remove(d)
|
||||
for file_name in files:
|
||||
if '.1shot.' in file_name:
|
||||
continue
|
||||
handled = False
|
||||
file_path = os.path.join(dir_path, file_name)
|
||||
relative_path = os.path.relpath(file_path, args.directory)
|
||||
|
||||
# is pyarmor_runtime?
|
||||
if not handled \
|
||||
and specified_runtime is None \
|
||||
and file_name.startswith('pyarmor_runtime') \
|
||||
and not file_name.endswith(('.lnk', '.i64', '.idb', '.id0', '.id1',
|
||||
'.id2', '.nam', '.til', '.bak')):
|
||||
try:
|
||||
new_runtime = RuntimeInfo(file_path)
|
||||
runtimes[new_runtime.serial_number] = new_runtime
|
||||
logger.info(
|
||||
f'Found new runtime: {new_runtime.serial_number} ({file_path})')
|
||||
print(new_runtime)
|
||||
handled = True
|
||||
except:
|
||||
pass
|
||||
|
||||
with open(file_path, 'rb') as f:
|
||||
beacon = f.read(16 * 1024 * 1024)
|
||||
|
||||
# is UTF-8 source?
|
||||
# TODO: only support natural one line now
|
||||
if not handled and b'__pyarmor__(__name__, __file__,' in beacon:
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
for line in f:
|
||||
if line.startswith('__pyarmor__(') and line.rstrip().endswith(')'):
|
||||
co = compile(line, '<str>', 'exec')
|
||||
bytes_raw = co.co_consts[0]
|
||||
assert type(bytes_raw) is bytes
|
||||
assert bytes_raw.startswith(b'PY')
|
||||
assert len(bytes_raw) > 64
|
||||
break
|
||||
logger.info(f'Found data in source: {relative_path}')
|
||||
# FIXME: bytes_raw can be kept from last iteration
|
||||
sequences.append((relative_path, bytes_raw))
|
||||
del bytes_raw
|
||||
handled = True
|
||||
except Exception as e:
|
||||
logger.error(f'Assume source, but {e} ({file_path})')
|
||||
|
||||
# TODO: is Nuitka package?
|
||||
# TODO: is pyc or single marshalled binary?
|
||||
|
||||
# print(runtimes, [(i[0], i[1][:16]) for i in sequences], args.output_dir or args.directory)
|
||||
decrypt_process(runtimes, sequences, args.output_dir or args.directory)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Reference in New Issue
Block a user