Simple hash generation and parsing
This is simple Python script to:
- Generate hash list for group of images (about 6-15k)
- Transform it to .xml document
# import os to execute linux commands
import os
# import eml.etree for xml manipulations
import xml.etree.cElementTree as ET
# import minidom to prettify xml
import xml.dom.minidom
# import datetime to get current time
from datetime import datetime
startTime = datetime.now() # varible that store datetime of script start
# create file with hashes using sha256
# os.system("sha256sum ~/somedir/*.{jpg,png,gif,jpeg} > hash_list.txt")
# create file with hashes using md5
os.system("md5sum ~/somedir/*.{jpg,png,gif,jpeg} > hash_list.txt")
#show generation (md5sum/sha256sum) work time
s = datetime.now() - startTime
print 'first part took about: %s' % s
root = ET.Element("root")
doc = ET.SubElement(root, "doc")
for line in open('hash_list.txt'): # create objects for hashes and names in every line
current_group = ET.SubElement(doc, 'img_item', )
current_hash = line.split(' ', 1)[0]
current_path = line.split(' ', 1)[1]
current_name = current_path.rsplit('/',1)[-1].rstrip()
ET.SubElement(current_group, "hash", value = current_hash).text = current_hash
ET.SubElement(current_group, "img_name", value = current_name).text = current_name
# convert tree to string
tree = ET.tostring(root)
xml = xml.dom.minidom.parseString(tree)
pretty_xml_as_string = xml.toprettyxml() # and format it!
xml_file = open("hash.xml", "w+")
xml_file.write(pretty_xml_as_string)
xml_file.close
# show script work time
s = datetime.now() - startTime
print 'it all took about: %s' % s