2

When using emails as usernames you want them to be unique over your table, but this can be a problem if you consider a GMail account, because of their username policy. They allow:

  • dots
  • digits
  • letters
  • plus addressing text by using the + sign, i.e. orange+juice@gmail.com
  • length between 6 and 30 characters, excluding dots and the appended part

But when resolving the username they do not consider:

  • dots
  • different capitalization
  • plus addressing

So, when you write to:

UserName@gmail.com
u.sername@gmail.com
user.name+forum@gmail.com
.u.s.e.r.n.a.m.e.@gmail.com
u.serName+doh@googlemail.com

You will always match the same account:

username@gmail.com

This class can help to define if the submitted email is a valid GMail address and to get the basic version: when using the parseMail() method it will just validate emails from other providers, so that you can submit an array of emails and get in return all the basics versions of GMail and all the other emails. By submitting an array to the isGmail() method, instead, you will get only an array of valid GMail accounts, in their basic version.

I'm applying lowercase to all the emails, the RFC 2821 allows case sensitive local-parts, but this is discouraged. Some examples:

<?php

    $list = array(
        'user.name@anemail.com',
        'username+acme@gmail.com',
        'email' => 'another@gmail.com',

        array(
            'test@ymail.com',
            'will+fail@gmail.com',
            'this032@googlemail.com',
            '"valid@test"@email.com',
            'Awesome@yahoo.com'
        ),

        'someone+doh@gmail.com',
        'AnotherUser+focus@gmail.com',
        'simple@gmail.com'
    );

    $gm = new GMailParser;

    # testing strings
    var_dump($gm->isGmail('user.name@amail.com'));
    var_dump($gm->isGMail('user.name@gmail.com'));
    /*

    bool(false)
    bool(true)

    */

    var_dump($gm->parseMail('user.name@amail.com'));
    var_dump($gm->parseMail('user.name@gmail.com'));
    /*

    string(19) "user.name@amail.com"
    string(18) "username@gmail.com"

    */


    # testing arrays
    print_r($gm->isGmail($list));
    /*

    Array
    (
        [0] => username@gmail.com
        [1] => another@gmail.com
        [2] => this032@gmail.com
        [3] => someone@gmail.com
        [4] => anotheruser@gmail.com
        [5] => simple@gmail.com
    )

    */


    print_r($gm->parseMail($list));
    /*

    Array
    (
        [0] => user.name@anemail.com
        [1] => username@gmail.com
        [2] => another@gmail.com
        [3] => test@ymail.com
        [4] => this032@googlemail.com
        [5] => "valid@test"@email.com
        [6] => awesome@yahoo.com
        [7] => someone@gmail.com
        [8] => anotheruser@gmail.com
        [9] => simple@gmail.com
    )

    */

I use this to save the basic version of the submitted GMail address. I also tend to save the version written by the user to mantain the plus addressing text, which is used to create filters in the GMail application:

username+forum@gmail.com

So that I can allow the user to login with any version of his email, it will always match the same account, but I can always send the email to the original version.

Edited by cereal

Votes + Comments
Good work c!
<?php

class GMailParser {

	/**
	*	GMail domains to check in search of aliases.
	*
	*	@var array
	*/
	private $domains = array('gmail.com', 'googlemail.com');


	/**
	*	Default value to assign to the clean version of the email.
	*
	*	@var string
	*/
	private $default = 'gmail.com';


	/**
	* 	Verify GMail addresses.
	*
	*	if $mail is string it returns boolean;
	*	if $mail is array it returns only valid GMail addresses
	*
	*	@param mixed $mail
	*	@return mixed
	*/
	public function isGmail($mail)
	{
		if(is_array($mail))
			return $this->_parseList($mail, true);

		return $this->_parts($mail, true);
	}


	/**
	* 	Check an email address or a list of emails,
	* 	if it is GMail then return the basic address.
	*
	*	@param mixed $mail 
	*	@return mixed
	*/
	public function parseMail($mail = '')
	{
		if(is_array($mail))
			return $this->_parseList($mail);

		if(empty($mail) || filter_var($mail, FILTER_VALIDATE_EMAIL) === false)
			return false;

		if(($data = $this->_parts($mail)) !== false)
			return $data;

		return false;
	}


	/**
	* 	Loop arrays.
	*
	*	@param array $list
	*	@param boolean $bool
	*	@return array
	*/
	private function _parseList($array, $bool = false)
	{
		$list = new RecursiveIteratorIterator(new RecursiveArrayIterator($array), false);
		$data = array();

		foreach($list as $mail)
		{
			if($bool === false)
			{
				$data[] = $this->parseMail($mail);
			}

			else
			{
				$data[] = $this->_parts($mail, false, true);
			}
		}

		$result = array_merge(array(), array_filter(array_unique($data)));

		return $result;
	}


	/**
	*	Parse mail, can return email or boolean.
	*
	*	@param string $str
	*	@param boolean $bool
	*	@param boolean $strict
	*	@return mixed
	*/
	private function _parts($str, $bool = false, $strict = false)
	{
		$data 	 = sscanf($str, '%[^@]@%s');
		$compose = array();
		list($local_part, $domain_part) = $data;

		if(in_array($domain_part, $this->domains))
		{
			$local_part = str_replace('.', '', $local_part);
			$local_part = strstr($local_part, '+', true) ? : $local_part;
			$pattern 	= '/^([a-zA-Z0-9.]{6,30}+)$/';

			if(preg_match($pattern, $local_part, $match) == 1)
			{
				$compose = [
					$local_part, '@', $this->default
				];

				if($bool === true)
					return true;
			}

			else
				return false;
		}

		if($strict === false)
		{
			$compose = [
				$local_part, '@', $domain_part
			];

			if($bool === true)
				return false;
		}

		$compose = array_map('trim', $compose);
		$compose = array_map('mb_strtolower', $compose);

		return implode('', $compose);
	}


}
1
Contributor
1
Reply
36
Views
2 Years
Discussion Span
Last Post by cereal
1

Whoops! There is a little bug in the previous code that prevents correct results when pushing googlemail.com accounts through parseMail() method, fixed by updating the _parts() method:

/**
*   Parse mail, can return email or boolean.
*
*   @param string $str
*   @param boolean $bool
*   @param boolean $strict
*   @return mixed
*/
private function _parts($str, $bool = false, $strict = false)
{
    $isgmail = false;
    $data    = sscanf($str, '%[^@]@%s');
    $compose = array();
    list($local_part, $domain_part) = $data;

    if(in_array($domain_part, $this->domains))
    {
        $local_part = str_replace('.', '', $local_part);
        $local_part = strstr($local_part, '+', true) ? : $local_part;
        $pattern    = '/^([a-zA-Z0-9.]{6,30}+)$/';

        if(preg_match($pattern, $local_part, $match) == 1)
        {
            $isgmail = true;
            $compose = [
                $local_part, '@', $this->default
            ];

            if($bool === true)
                return true;
        }

        else
            return false;
    }

    if($strict === false && $isgmail === false)
    {
        $compose = [
            $local_part, '@', $domain_part
        ];

        if($bool === true)
            return false;
    }

    $compose = array_map('trim', $compose);
    $compose = array_map('mb_strtolower', $compose);

    return implode('', $compose);
}

Previous test:

Array
(
    [0] => user.name@anemail.com
    [1] => username@gmail.com
    [2] => another@gmail.com
    [3] => test@ymail.com
    [4] => this032@googlemail.com    <-- error
    [5] => "valid@test"@email.com
    [6] => awesome@yahoo.com
    [7] => someone@gmail.com
    [8] => anotheruser@gmail.com
    [9] => simple@gmail.com
)

Now:

Array
(
    [0] => user.name@anemail.com
    [1] => username@gmail.com
    [2] => another@gmail.com
    [3] => test@ymail.com
    [4] => this032@gmail.com         <-- correct
    [5] => "valid@test"@email.com
    [6] => awesome@yahoo.com
    [7] => someone@gmail.com
    [8] => anotheruser@gmail.com
    [9] => simple@gmail.com
)
Votes + Comments
Excellent!
Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.