diff --git a/post_processing.py b/post_processing.py index d95bcfe54fb36654c1b0c955f5ebe27df8a4bf8e..a179abb1508283f13e7bccd792f40162497cc4c8 100755 --- a/post_processing.py +++ b/post_processing.py @@ -102,18 +102,21 @@ def replace_files(uncompressed_dir_path, original_files_dir_path): # directories already exists pass - if "__" in header_line and "/" in header_line: # means the file is a split file that needs to be merged with the corresponding other split files - subfile_number_str, subfile_total_str = header_line.split("__")[1].split("/") # get the number of splits and the split index for this subfile - subfile_number, subfile_total = int(subfile_number_str), int(subfile_total_str) # warning, subfile count starts from 1 - - # get the list of subfiles lines for the same file, or init the list with size = number of subfiles - split_files_dict[original_file_path] = split_files_dict.get(original_file_path, ["" for i in range(subfile_total)]) - - if split_files_dict[original_file_path][subfile_number-1] != "": # use subfile_number-1 because it starts from 1 and not 0 - print("warning post processing (replace_files) : a subfile with the same number already exists",filename,original_file_path, subfile_number) - else: - # save the bytes of this subfile - split_files_dict[original_file_path][subfile_number-1] = b"".join(small_file_bytes_lines[1:]) + if "__" in header_line: # means the file is a split file that needs to be merged with the corresponding other split files + # should be like "path/basename__1/3" + header_counter = header_line.split("__")[-1] + if "/" in header_counter: # if the '__' was in the basename of the file, it's not for the counter + subfile_number_str, subfile_total_str = header_counter.split("/") # get the number of splits and the split index for this subfile + subfile_number, subfile_total = int(subfile_number_str), int(subfile_total_str) # warning, subfile count starts from 1 + + # get the list of subfiles lines for the same file, or init the list with size = number of subfiles + split_files_dict[original_file_path] = split_files_dict.get(original_file_path, ["" for i in range(subfile_total)]) + + if split_files_dict[original_file_path][subfile_number-1] != "": # use subfile_number-1 because it starts from 1 and not 0 + print("warning post processing (replace_files) : a subfile with the same number already exists",filename,original_file_path, subfile_number) + else: + # save the bytes of this subfile + split_files_dict[original_file_path][subfile_number-1] = b"".join(small_file_bytes_lines[1:]) else: # write the content without the header line in the file