intermediate checkin

This commit is contained in:
2020-01-22 19:24:07 +01:00
parent b75a865a7c
commit 304f0d3eda
5 changed files with 136 additions and 3 deletions

70
cmp_by_hash.py Normal file
View File

@@ -0,0 +1,70 @@
#!python3
# Created 18 Sep 2019
# @author: andreas
""" Module: Main Template """
from argparse import ArgumentParser
import logging
import sys
import os
import hashlib
from pathlib import Path
import datetime as dt
import pickle
import shutil
LOG = logging.getLogger()
PROXY_DIR = "."
def cli(argv=None):
# command line interface
if argv is None:
argv = sys.argv
LOG.info("%s %s", os.path.basename(argv[0]), " ".join(argv[1:]))
parser = ArgumentParser(description="Module Template")
parser.add_argument("--src")
parser.add_argument("--dest")
args = parser.parse_args(argv[1:])
argd = vars(args)
# arguments
for k, v in argd.items():
print(k, v)
# feature
srcd = build_hashdict(Path(args.src))
destd = build_hashdict(Path(args.dest))
srch = set(srcd.keys())
desth = set(destd.keys())
xor_hashes = srch ^ desth
for h in xor_hashes:
if h in srcd:
print(srcd[h])
for h in xor_hashes:
if h in destd:
print(destd[h])
LOG.info("done")
return 0
def build_hashdict(base):
files_by_hash = dict()
for fp in base.glob("**/*.jpg"):
h = file_hash(fp)
files_by_hash[h] = fp
return files_by_hash
def file_hash(filepath):
h = hashlib.sha256()
with filepath.open("rb") as fh:
h.update(fh.read())
return h.digest()
if __name__ == "__main__":
logging.Formatter.default_time_format = '%H:%M:%S'
logging.Formatter.default_msec_format = '%s.%03d'
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s [%(process)i] %(levelname).4s %(module)s.%(funcName)s: %(message)s')
sys.exit(cli())