251 lines
10 KiBLFS
Python
251 lines
10 KiBLFS
Python
import zstandard as zstd
|
|
|
|
from ..blk.FileInfo import FileType
|
|
from ..blk.Block import Block
|
|
from ..blk.Chunk import ChunkParser, Chunk
|
|
from ..blk.ParamParser import BLKTypes
|
|
from ..DataHandler import DataHandler
|
|
|
|
|
|
class BlkDecoder:
|
|
"""
|
|
a blk parser
|
|
inputs:
|
|
dat: the data to be parsed
|
|
offset: how far along into the data the blk starts
|
|
name_map: an optional parameter for blks that have a name map, see FileInfo.py for more info
|
|
zstd_dict: an optional parameter for blks that have a zstd dict, see FileInfo.py for more info
|
|
"""
|
|
def __init__(self, dat, offset=0, name_map:list[bytearray] = None, zstd_dict = None):
|
|
self.data = None
|
|
self.blkType = FileType(dat[0+offset]) # gets blk type, the first byte
|
|
if not self.blkType.is_zstd():
|
|
self.data = DataHandler(dat, offset=offset+1, read_from_start=False)
|
|
else:
|
|
if self.blkType.needs_dict():
|
|
if zstd_dict is None:
|
|
print("BAD DICT")
|
|
# d = zstd.ZstdCompressionDict(zstd_dict)
|
|
d= zstd_dict
|
|
raw = zstd.ZstdDecompressor(d).decompress(dat[1:])
|
|
self.data = DataHandler(raw, offset=offset, read_from_start=False)
|
|
else:
|
|
try:
|
|
raw = zstd.decompress(dat[1:])
|
|
except zstd.ZstdError:
|
|
# only done because some zstd data in VROMFS can be in streams instead of standard format
|
|
x = zstd.ZstdDecompressor().stream_reader(dat[1:])
|
|
raw = x.read()
|
|
x.close()
|
|
self.data = DataHandler(raw, offset=offset, read_from_start=False)
|
|
self.names_in_name_map = self.decode_uleb128() # gets the number of names in the name map
|
|
self.names = None
|
|
if self.blkType.is_slim():
|
|
if name_map is None:
|
|
print("BAD NAME MAP")
|
|
self.names = []
|
|
for name in name_map:
|
|
try:
|
|
self.names.append(name.decode("utf-8"))
|
|
except UnicodeDecodeError:
|
|
self.names.append("BADBADBAD"+name.decode("utf-8", errors="ignore"))
|
|
else:
|
|
self.name_map_size = self.decode_uleb128() # gets the size of the name map
|
|
|
|
self.names = [x.decode("utf-8") for x in self.data.fetch(self.name_map_size - 1).split(b"\x00")]
|
|
# print(self.names)
|
|
self.data.advance(1) # it only fetches size - 1 for speed to reduce slicing
|
|
if len(self.names) != self.names_in_name_map:
|
|
print("RED ALERT")
|
|
|
|
self.num_of_blocks = self.decode_uleb128()
|
|
self.num_of_params = self.decode_uleb128()
|
|
self.params_data_size = self.decode_uleb128()
|
|
self.params_data = self.data.fetch(self.params_data_size) # used later on, data
|
|
'''
|
|
here we are are skipping results creation and starting with chunks
|
|
assume we are doing let chunks
|
|
'''
|
|
chunks = []
|
|
parser = ChunkParser(self.names, BLKTypes(self.names, self.params_data))
|
|
for i in range(self.num_of_params):
|
|
chunks.append(parser.parse(self.data.fetch(8)))
|
|
|
|
# chunks = Chunks(self.data, self.num_of_params, self.names, B)
|
|
blocks = []
|
|
for i in range(self.num_of_blocks): # this creates all the blocks
|
|
name_id = self.decode_uleb128()
|
|
param_count = self.decode_uleb128()
|
|
block_count = self.decode_uleb128()
|
|
if block_count > 0:
|
|
first_block_id = self.decode_uleb128()
|
|
else:
|
|
first_block_id = -1
|
|
|
|
# print(name_id, param_count, block_count, first_block_id, self.data._ptr, self.block_id_to_name(name_id))
|
|
# print(self.block_id_to_name(name_id), name_id)
|
|
blocks.append(Block(self.block_id_to_name(name_id), param_count, block_count, first_block_id))
|
|
|
|
# if current_t > 0:
|
|
# print(f"After block creation and final file read: {time.perf_counter() - current_t}")
|
|
|
|
result_ptr = 0
|
|
for block in blocks: # this grabs all the values and puts them in their correct blocks
|
|
field_count = block.param_count
|
|
for i in range(field_count):
|
|
block.add_field(chunks[result_ptr + i])
|
|
result_ptr += field_count
|
|
|
|
# if current_t > 0:
|
|
# print(f"After block param matching: {time.perf_counter() - current_t}")
|
|
|
|
self.parent = blocks[0]
|
|
self.from_blocks_with_parent(self.parent, blocks)
|
|
|
|
# if current_t > 0:
|
|
# print(f"After block hierarchy creation: {time.perf_counter() - current_t}")
|
|
|
|
def to_dict(self):
|
|
return self.parent.to_dict()
|
|
|
|
def decode_uleb128(self):
|
|
"""Decodes a ULEB128 encoded value."""
|
|
value = 0
|
|
shift = 0
|
|
while True:
|
|
byte = self.data.fetch(1)[0]
|
|
value |= (byte & 0x7f) << shift
|
|
if not (byte & 0x80):
|
|
break
|
|
shift += 7
|
|
return value
|
|
|
|
def block_id_to_name(self, block_id):
|
|
if block_id == 0:
|
|
return "root"
|
|
else:
|
|
return self.names[block_id - 1]
|
|
|
|
def from_blocks_with_parent(self, parent, blocks):
|
|
for i in range(parent.blocks_count):
|
|
parent.children.append(blocks[i + parent.first_block_id])
|
|
self.from_blocks_with_parent(blocks[i + parent.first_block_id], blocks)
|
|
|
|
|
|
class BlkBytes:
|
|
"""
|
|
A class that acts like BLkDecoder without all the parsing, simply used to get all the bytes from a BLK
|
|
"""
|
|
def __init__(self, dat, offset=0, name_map:list[bytearray] = None, zstd_dict = None):
|
|
self.data = None
|
|
self.bytes = bytearray()
|
|
#print(dat)
|
|
# print(len(dat))
|
|
# print(type(dat))
|
|
self.blkType = FileType(dat[0+offset]) # gets blk type, the first byte
|
|
self.bytes += bytearray([dat[0+offset]])
|
|
if not self.blkType.is_zstd():
|
|
self.data = DataHandler(dat, offset=offset+1, read_from_start=False)
|
|
else:
|
|
if self.blkType.needs_dict():
|
|
if zstd_dict is None:
|
|
print("BAD DICT")
|
|
# d = zstd.ZstdCompressionDict(zstd_dict)
|
|
d= zstd_dict
|
|
raw = zstd.ZstdDecompressor(d).decompress(dat[1:])
|
|
self.data = DataHandler(raw, offset=offset, read_from_start=False)
|
|
else:
|
|
try:
|
|
raw = zstd.decompress(dat[1:])
|
|
except zstd.ZstdError:
|
|
# only done because some zstd data in VROMFS can be in streams instead of standard format
|
|
x = zstd.ZstdDecompressor().stream_reader(dat[1:])
|
|
raw = x.read()
|
|
x.close()
|
|
self.data = DataHandler(raw, offset=offset, read_from_start=False)
|
|
self.names_in_name_map, temp = self.decode_uleb128() # gets the number of names in the name map
|
|
self.bytes += temp
|
|
# self.names = None
|
|
if self.blkType.is_slim():
|
|
if name_map is None:
|
|
print("BAD NAME MAP")
|
|
else:
|
|
self.name_map_size, temp = self.decode_uleb128() # gets the size of the name map
|
|
self.bytes += temp
|
|
|
|
# self.names = [x.decode("utf-8") for x in self.data.fetch(self.name_map_size - 1).split(b"\x00")]
|
|
self.bytes += self.data.fetch(self.name_map_size)
|
|
# print(self.names)
|
|
# self.data.advance(1)
|
|
# if len(self.names) != self.names_in_name_map:
|
|
# print("RED ALERT")
|
|
self.num_of_blocks, temp = self.decode_uleb128()
|
|
self.bytes += temp
|
|
self.num_of_params, temp = self.decode_uleb128()
|
|
self.bytes += temp
|
|
self.params_data_size, temp = self.decode_uleb128()
|
|
self.bytes += temp
|
|
# self.params_data = self.data.fetch(self.params_data_size) # used later on, data
|
|
self.bytes += self.data.fetch(self.params_data_size)
|
|
'''
|
|
here we are are skipping results creation and starting with chunks
|
|
assume we are doing let chunks
|
|
'''
|
|
# chunks = []
|
|
# parser = ChunkParser(self.names, BLKTypes(self.names, self.params_data))
|
|
# for i in range(self.num_of_params):
|
|
# chunks.append(parser.parse(self.data.fetch(8)))
|
|
self.bytes += self.data.fetch(self.num_of_params*8)
|
|
# chunks = Chunks(self.data, self.num_of_params, self.names, B)
|
|
# blocks = []
|
|
for i in range(self.num_of_blocks): # this creates all the blocks
|
|
name_id, temp = self.decode_uleb128()
|
|
self.bytes += temp
|
|
param_count, temp = self.decode_uleb128()
|
|
self.bytes += temp
|
|
block_count, temp = self.decode_uleb128()
|
|
self.bytes += temp
|
|
if block_count > 0:
|
|
first_block_id, temp = self.decode_uleb128()
|
|
self.bytes += temp
|
|
# else:
|
|
# first_block_id = -1
|
|
# print(self.block_id_to_name(name_id), name_id)
|
|
# blocks.append(Block(self.block_id_to_name(name_id), param_count, block_count, first_block_id))
|
|
|
|
# if current_t > 0:
|
|
# print(f"After block creation and final file read: {time.perf_counter() - current_t}")
|
|
'''
|
|
result_ptr = 0
|
|
for block in blocks: # this grabs all the values and puts them in their correct blocks
|
|
field_count = block.param_count
|
|
for i in range(field_count):
|
|
block.add_field(chunks[result_ptr + i])
|
|
result_ptr += field_count
|
|
|
|
# if current_t > 0:
|
|
# print(f"After block param matching: {time.perf_counter() - current_t}")
|
|
|
|
self.parent = blocks[0]
|
|
self.from_blocks_with_parent(self.parent, blocks)
|
|
|
|
'''# if current_t > 0:
|
|
# print(f"After block hierarchy creation: {time.perf_counter() - current_t}")
|
|
|
|
def decode_uleb128(self):
|
|
"""Decodes a ULEB128 encoded value."""
|
|
value = 0
|
|
shift = 0
|
|
bytes_ = bytearray()
|
|
while True:
|
|
byte = self.data.fetch(1)
|
|
bytes_ += byte
|
|
byte = byte[0]
|
|
value |= (byte & 0x7f) << shift
|
|
if not (byte & 0x80):
|
|
break
|
|
shift += 7
|
|
return value, bytes_
|
|
|
|
|