DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world
Compare Jpg Image Files Using Histogram
// use progressively more costly method to see if two jpg files are different
#!/usr/bin/python
from PIL import Image
import glob
import os
import sys
import binascii
import md5
def histogram_md5(im):
m = md5.new()
h = im.histogram()
m.update(str(h))
return m.digest()
if len(sys.argv) < 1:
print "No file names provided."
sys.exit()
if len(sys.argv) is not 3:
print "two files only!"
sys.exit()
file1 = sys.argv[1]
file2 = sys.argv[2]
print "open ", file1
im1 = Image.open(file1)
print "open ", file2
im2 = Image.open(file2)
print "sizes: ", im1.size, " " , im2.size
if im1.size != im2.size:
print file1, " and ", file2 , " are different"
sys.exit()
print "info: " ,im1.info , " " , im2.info
if (im1.info != im2.info):
print file1, " and ", file2 , " are different"
sys.exit()
print "mode: " ,im1.mode , " ", im2.mode
if im1.mode != im2.mode:
print file1, " and ", file2 , " are different"
sys.exit()
file1_hm5 = histogram_md5(im1)
file2_hm5 = histogram_md5(im2)
print "histogram md5: ", binascii.b2a_hex(file1_hm5), " ", binascii.b2a_hex(file2_hm5)
if file1_hm5 != file2_hm5:
print file1, " and ", file2 , " are different"
sys.exit()
f1 = os.path.getsize(file1)
f2 = os.path.getsize(file2)
if f1 != f2:
print file1, " and ", file2 , " have different sizes."
print "possibly because they have different meta data."
print "looking at every bit of each file. this can take a while..."
imc1 = list(im1.getdata())
l1 = len(imc1)
imc2 = list(im2.getdata())
l2 = len(imc2)
if l1 != l2:
print file1, " and ", file2 , " are different"
sys.exit()
if imc1 != imc2:
print file1, " and ", file2 , " are different"
sys.exit()
print file1, " and ", file2 , " contain the same image data."





