DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world
Python * To Unicode
// description of your code here
Bruteforce detects encoding and returns Unicode
ENCODINGS = [ "ascii",
"cp1250",
"cp1251",
"cp1252",
"latin-1",
"iso-8859-1",
"iso-8859-2",
"iso-8859-4",
"iso-8859-5",
"utf-8" ]
def getUNICODE(page_data="", f=""):
if type(page_data) == unicode:
return page_data
# end
for enc in ENCODINGS:
try:
try:
data_ = page_data.decode(enc, "ignore")
data__ = data_.encode(enc, "ignore")
if len(data__) != len(page_data):
continue
except:
data_ = page_data.encode(enc, "ignore")
# end
print enc
data_ = unicode(data_)
return data_
except:
pass
return page_data
# end





