Add split_count argument to shuffle_binpack.py

this optional argument allows for splitting the input binpack in multiple output binpacks while shuffling.
This commit is contained in:
Joost VandeVondele
2020-12-24 14:34:26 +01:00
committed by nodchip
parent 4f6fdca31f
commit bb6188430d

View File

@@ -25,16 +25,18 @@ def index_binpack(file):
return index return index
def copy_binpack_indexed(in_file, index, out_file): def copy_binpack_indexed(in_file, index, out_files):
print('Copying...') print('Copying...')
total_size = 0 total_size = 0
report_every = 100 report_every = 100
prev_mib = -report_every prev_mib = -report_every
nextfile = 0
for offset, size in index: for offset, size in index:
in_file.seek(offset, os.SEEK_SET) in_file.seek(offset, os.SEEK_SET)
data = in_file.read(size) data = in_file.read(size)
assert len(data) == size assert len(data) == size
out_file.write(data) out_files[nextfile].write(data)
nextfile = (nextfile + 1) % len(out_files)
total_size += size total_size += size
mib = total_size // 1024 // 1024 mib = total_size // 1024 // 1024
@@ -44,26 +46,44 @@ def copy_binpack_indexed(in_file, index, out_file):
def main(): def main():
if len(sys.argv) < 3: if len(sys.argv) < 3:
print('Usage: python shuffle_binpack.py infile outfile') print('Usage: python shuffle_binpack.py infile outfile [split_count]')
return return
in_filename = sys.argv[1] in_filename = sys.argv[1]
out_filename = sys.argv[2]
if len(sys.argv) > 3:
# split the infile in split_count pieces, creating new outfile names based on the provided name
basefile = sys.argv[2]
split_count = int(sys.argv[3])
base=os.path.splitext(basefile)[0]
ext=os.path.splitext(basefile)[1]
out_filenames = []
for i in range(split_count):
out_filenames.append(base+"_{}".format(i)+ext)
else:
out_filenames = [sys.argv[2]]
for out_filename in out_filenames:
if (Path(out_filename).exists()): if (Path(out_filename).exists()):
print('Output path already exists. Please specify a path to a file that does not exist.') print('Output path {} already exists. Please specify a path to a file that does not exist.'.format(out_filename))
return return
in_file = open(in_filename, 'rb') print(out_filenames)
out_file = open(out_filename, 'wb')
in_file = open(in_filename, 'rb')
index = index_binpack(in_file) index = index_binpack(in_file)
print('Shuffling...') print('Shuffling...')
random.shuffle(index) random.shuffle(index)
copy_binpack_indexed(in_file, index, out_file) out_files = []
for out_filename in out_filenames:
out_files.append(open(out_filename, 'wb'))
copy_binpack_indexed(in_file, index, out_files)
in_file.close() in_file.close()
for out_file in out_files:
out_file.close() out_file.close()
main() main()