For Windows, there isn't a built in md5sum tool, so you've got to be a little more creative (or you could just use HashCheck, but let's be adventurous). I wrote a python script that will do it (it will also run on Linux, but that seems kind of pointless given the built-in option):
Call the script from the same directory as all of the .md5 files and datafiles (if you don't want to check every .md5 file, just change the md5_glob variable.
import os
from glob import iglob
import hashlib
#assumes that the directory contains text files with md5 sums named *.md5
#and that the first line of those files contain an md5 hash, followed by
#whitespace followed by the name of another file
#the other file should also be in the directory
md5_glob = '*.md5'
def md5_for_file(f, block_size=2**22):
#source: http://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python, the answer from Lars Wirzenius
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
return md5.hexdigest() #changed from example
def read_md5_from_file(filename):
#reads file filename, and
with open(filename) as infile:
(hash, name) = infile.readline().split()
return (hash, name)
total_comparisons = 0
good_comparisons = 0
for md5_file_name in iglob(md5_glob):
(reference_hash, data_file_name) = read_md5_from_file(md5_file_name)
try:
data_file = open(data_file_name, 'rb')
except:
print('Warning: cannot open file ' + data_file_name)
data_file = None
if data_file is not None:
total_comparisons += 1
data_file_hash = md5_for_file(data_file)
data_file.close()
if data_file_hash == reference_hash:
good_comparisons += 1
else:
print("hashes don't match for file " + data_file_name)
print("Total files examined: " + str(total_comparisons))
print("Files with good md5 sum: " + str(good_comparisons))
No comments:
Post a Comment