Of cause I looked a moment these looping without loop tests and could leave them as original:
# -*- coding: latin1 -*-
def validateEmail(a):
sep=[x for x in a if not x.isalpha()]
sepjoined=''.join(sep)
## sep joined must be ..@.... form
if sepjoined.strip('.') != '@': return False
end=a
for i in sep:
part,i,end=end.partition(i)
if len(part)<2: return False
return True
if __name__ == '__main__':
emails = [ "test.@web.com","test+john@web.museum", "test+john@web.m",
"a@n.dk", "and.bun@webben.de","marjaliisa.hamalainen@hel.fi",
"marja-liisa.hämäläinen@hel.fi", "marjaliisa.hämäläinen@hel.fi"]
print "Valid emails are:"
for i in filter(validateEmail,emails): print '\t',i
print "Non-ascii letters are nowadays allowed also in names!"
""" Output:
Valid emails are:
and.bun@webben.de
marjaliisa.hamalainen@hel.fi
marjaliisa.hämäläinen@hel.fi
Non-ascii letters are nowadays allowed also in names!
"""
pyTony
pyMod
6,310 posts since Apr 2010
Reputation Points: 879
Solved Threads: 987
Skill Endorsements: 26
OK, my code has bug, it does not check the last part which is left in end variable before returning from function for valid length. Luckily, because I reused the logic for other check, I noticed the bug in debuging it, Here correction:
# -*- coding: latin1 -*-
import re
def validateEmail(a):
sep=[x for x in a if not x.isalpha()]
sepjoined=''.join(sep)
## sep joined must be ..@.... form
if sepjoined.strip('.') != '@': return False
end=a
for i in sep:
part,i,end=end.partition(i)
if len(part)<2: return False
return len(end)>1
def emailval(address):
pattern = "[\.\w]{2,}[@]\w+[.]\w+"
if re.match(pattern, address):
return True
else:
return False
if __name__ == '__main__':
emails = [ "test.@web.com","test+john@web.museum", "test+john@web.m",
"a@n.dk", "and.bun@webben.de","marjaliisa.hämäläinen@hel.fi",
"marja-liisa.hämäläinen@hel.fi", "marjaliisah@hel.",'tony@localhost']
print "Valid emails are:"
for i in filter(validateEmail,emails): print '\t',i
print "Regexp gives wrong answer:"
for i in filter(emailval,emails): print '\t',i
"""
Valid emails are:
and.bun@webben.de
marjaliisa.hämäläinen@hel.fi
tony@localhost
Regexp gives wrong answer:
test.@web.com
and.bun@webben.de
"""
Here also confirmation that the regexp posted is even more wrong than previous check.
pyTony
pyMod
6,310 posts since Apr 2010
Reputation Points: 879
Solved Threads: 987
Skill Endorsements: 26
Update with better style than this newbie did and changing the match to be similar to this better regular expression, which is little restricted version from
http://www.regular-expressions.info/email.html (no test.@web.com accepted)
Here you see some examples that standard would pass:
http://en.wikipedia.org/wiki/Email_address#Valid_email_addresses
# -*- coding: latin1 -*-
import re
def validate_email(address):
""" Validate by python equivalent to regular expression below """
#to not allow single letter parts increase len_limit to 2 or more
len_limit, max_domain = 1, 4
# only ascii values not all alpha
sep = [code for code in address if not code.isalpha() or ord(code) > 128]
if (# sep joined must be ..@.... form
''.join(sep).strip('.') != '@' or
# must have point after @
sep[-1] == '@'):
return False
else:
end = address
for s in sep:
part, s, end = end.partition(s)
if len(part) < len_limit:
return False
return max_domain >= len(end)>1
def email_validate_re(address):
""" from http://www.regular-expressions.info/email.html """
pattern = r"\b[a-zA-Z0-9._%+-]*[a-zA-Z0-9_%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b"
return re.match(pattern, address)
if __name__ == '__main__':
emails = [ "test.@web.com","test@com", "test+john@web.museum", "test+john@web.m",
"address@n.dk", "and.bun@webben.de","marjaliisa.hamalainen@hel.fi",
"marja-liisa.hämäläinen@hel.fi", "marjaliisah@hel.",'tony@veijalainen.localhost']
print("Valid emails by my function are:")
print('\t' + '\n\t'.join(email for email in emails if validate_email(email)))
print("\nRegexp answer:")
print('\t' + '\n\t'.join(email for email in emails if email_validate_re(email)))
"""
Valid emails by my function are:
address@n.dk
and.bun@webben.de
marjaliisa.hamalainen@hel.fi
Regexp answer:
address@n.dk
and.bun@webben.de
marjaliisa.hamalainen@hel.fi
"""
pyTony
pyMod
6,310 posts since Apr 2010
Reputation Points: 879
Solved Threads: 987
Skill Endorsements: 26
pyTony
pyMod
6,310 posts since Apr 2010
Reputation Points: 879
Solved Threads: 987
Skill Endorsements: 26
You must then relax the limitation:
def validate_email(address):
""" Validate by python equivalent to regular expression below """
# to not allow single letter parts increase len_limit to 2 or more
len_limit, max_domain = 1, 4
# acceptable in left side in username
accept_username = '_-'
# only ascii values not all alpha
sep = [code for code in address if ((not code.isalpha() and code not in accept_username)
or ord(code) > 128)]
if (# sep joined must be ..@.... form
''.join(sep).strip('.') != '@' or
# must have point after @
sep[-1].strip() == '@'):
return False
else:
end = address
for s in sep:
part, s, end = end.partition(s)
if len(part) < len_limit:
return False
return max_domain >= len(end) > 1
pyTony
pyMod
6,310 posts since Apr 2010
Reputation Points: 879
Solved Threads: 987
Skill Endorsements: 26