Skip to content

Commit

Permalink
Introduce basics for modification detection
Browse files Browse the repository at this point in the history
As part of this work, we need to verify that files that are uploaded
to permanent are download without changes.

This modification detection could also help verify completion of uploads
 and downloads.

Signed-off-by: Fon E. Noel NFEBE <fenn25.fn@gmail.com>
  • Loading branch information
nfebe committed Apr 12, 2023
1 parent c5a39d4 commit 0553bfc
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 3 deletions.
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,20 @@ To test a nest with more levels, simply paste a nested folder structure inside `

Run

`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=nested`

Check the downloads folder in `test-tree/downloads` and ensure that the `downloads/nested` directory has a structure like the nested directory uploaded in the [nested uploads test](#nested-uploads).
`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=misc/nested`

To verify that everything in the nest folder was downloaded correctly run `./verify.py --nested-complete`.

### What file types and scenarios are left out?

Anything not included in the section above describing what is currently covered is by implication excluded from these tests.

## Hash verification

### Modification Detection

To verify that files that were successfully uploaded and downloaded have remained unchanged as we would expect run `./verify.py --succeeded`.

## Troubleshooting

- Remember that the commands are examples and some of the arguments may not apply to your specific environment.
Expand Down
119 changes: 119 additions & 0 deletions verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3
import os
import sys
import hashlib
import argparse

DOWNLOAD_MISC_DIR = "test-tree/downloads/misc"
MISC_DIR = "test-tree/misc"
CHUNK_SIZE = 1024

OKBLUE = "\033[94m"
OKGREEN = "\033[92m"
FAIL = "\033[91m"
WARNING = "\033[93m"
ENDC = "\033[0m"


def hash_file(file_path):
""" "Make and return SHA-1 hash of file at file_path"""
h = hashlib.sha1()
with open(file_path, "rb") as file:
chunk = 0
while chunk != b"":
# read only CHUNK_SIZE bytes at a time
chunk = file.read(CHUNK_SIZE)
h.update(chunk)
return h.hexdigest()


def crawl_upload_and_download_paths():
"""Build a list of uploaded and downloaded paths"""
uploaded_paths = []
downloaded_paths = []
for subdir, _, files in os.walk(MISC_DIR):
for file in files:
uploaded_paths.append(os.path.join(subdir, file))

for subdir, _, files in os.walk(DOWNLOAD_MISC_DIR):
for file in files:
downloaded_paths.append(os.path.join(subdir, file))
return uploaded_paths, downloaded_paths


def make_file_to_harsh_maps():
uploaded_paths, downloaded_paths = crawl_upload_and_download_paths()
pre_upload_hashes = []
post_upload_hashes = []
for path in uploaded_paths:
pre_upload_hashes.append({"path": path, "hash": hash_file(path)})
for path in downloaded_paths:
post_upload_hashes.append({"path": path, "hash": hash_file(path)})
return pre_upload_hashes, post_upload_hashes


def parse_cli():
"""Prepare parser"""
parser = argparse.ArgumentParser(
prog="verify", description="Check results of upload/download operations"
)
parser.add_argument(
"--misc-complete",
help="Verify that both the upload and download of the complete misc folder was successful",
action="store_true",
)
parser.add_argument(
"--nested-complete",
help="Verify that both the upload and download of the complete nested folder was successful",
action="store_true",
)
parser.add_argument(
"--succeeded",
help="Verify that files that were successfully uploaded where downloaded successfully",
action="store_true",
)

return parser


def main():
parser = parse_cli()
args = parser.parse_args()
pre_upload_hash_data, post_upload_hash_data = make_file_to_harsh_maps()
pre_upload_hashes = map(lambda x: x.get("hash"), pre_upload_hash_data)

failed_once = False
if args.succeeded:
for file_data in post_upload_hash_data:
print(f"{OKBLUE}Verifying hash for {file_data.get('path')} ...{ENDC}")
if file_data.get("hash") not in pre_upload_hashes:
print(
f"{WARNING}The hash to the path {file_data.get('path')} is missing!{ENDC}"
)
print(
f"{WARNING}File has either been modified (on disk or permanent) or is missing!{ENDC}\n"
)
failed_once = True
if not failed_once:
print(f"{OKGREEN}\nVerification complete!{ENDC}\n")
print(
f"{OKGREEN}All downloaded files have matching hashes in pre-uploaded file hashes.{ENDC}\n"
)
else:
print(
f"{FAIL}\nVerification complete but failed! Missing hash(es) detected.\n{ENDC}"
)
print(
f"{FAIL}At least once missing hash detected, check the logs above.\n{ENDC}"
)
elif args.misc_complete:
pass
elif args.nested_complete:
pass
else:
print("Not sure what to do!\n\n")
parser.print_help()


if __name__ == "__main__":
main()

0 comments on commit 0553bfc

Please sign in to comment.