1.11M Members

Regex to remove 'Illegal Characters'

 
0
 

Hi all

I've been trying to find a regular expression that checks if input contains any of the following characters only:

`~!@#$%^&*()-=+\|/?.>,<;:'"[{]}

I want to allow users to input any normal character a-z or any numbers as well as underscores and any special character that resembles a letter such as é, ê, ô or ÿ etc

So far I have the following which doesn't allow for any of the special characters that I want to allow users to use:

/[^\w\s]/g

The code i'm using is as follows:

function checkName (strng) {
var error = "";

    var illegalChars = /[^\w\s]/g; // allow letters, numbers, and underscores
    if (strng == "") {
   		error = "Please enter your name.\n";
	}
	else if((strng.length < 2)) {
    	error = "The name is the wrong length.\n";
    }
    else if (illegalChars.test(strng)) {
    	error = "The name contains illegal characters.\n";
	}
return error;
}
 
0
 

Try the following character class: var illegalChars = /[\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e]/g; // Don't allow any of these Javascript supports specifying unicode characters by hexadecimal expressions like \u0060 and ranges like \u007b-\u007e .

There is a fun website at http://hamstersoup.com/javascript/regexp_character_class_tester.html that gives you the unicode expressions for any character class you specify.

 
0
 

Try the following character class: var illegalChars = /[\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e]/g; // Don't allow any of these Javascript supports specifying unicode characters by hexadecimal expressions like \u0060 and ranges like \u007b-\u007e .

There is a fun website at http://hamstersoup.com/javascript/regexp_character_class_tester.html that gives you the unicode expressions for any character class you specify.

Thanks for that but there seems to be one problem...the 'illegal' characters are only detected if they are the first or last letter. If it's anywhere between valid characters it goes undetected. Is there any way I could change the code to fix this?

 
0
 

...the 'illegal' characters are only detected if they are the first or last letter. If it's anywhere between valid characters it goes undetected.

That's strange, I can't duplicate the problem. Here is the test script I'm using. Can you give me an example to put in the teststring variable that results in undetected illegal characters?

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    "http://www.w3.org/TR/html4/strict.dtd"
    >
<html lang="en">
<head>
    <title>Test regex punctuation filter</title>
    
<script type="text/javascript">
function checkName (strng) {
    var error = "";
    var illegalChars = /[\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e]/g; // Don't allow any of these
    if (strng == "") {
   	error = "Please enter your name.\n";
    }
    else if((strng.length < 2)) {
    	error = "The name is the wrong length.\n";
    }
    else if (illegalChars.test(strng)) {
    	error = "The name contains illegal characters.\n";
    }
return error;
}

// The following lines test the function with a string containing illegal chars.
var teststring = "Here is a string with ill:egal ch@racters";
var errmsg = checkName(teststring);
//alert("Testing \'" + teststring + "\' results in \'" + errmsg + "\'");
document.writeln("Testing <p><b>" + teststring + "</b><p> results in <p><b>" + errmsg);
</script>

</head>
<body>

</body>
</html>
 
0
 

I simplified my test script. It still seems to find illegal characters wherever they are in the test string.

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    "http://www.w3.org/TR/html4/strict.dtd"
    >
<html lang="en">
<head>
    <title>Test regex punctuation filter</title>
    
<script type="text/javascript">
function checkString (strng) {
    var error = false;
    var illegalChars = /[\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e]/g; // Don't allow any of these
    error = (illegalChars.test(strng));
return error;
}

// The following lines test the function with a string containing illegal chars.
var teststring = "Here is a str&ing with illegal characters";
var errmsg = checkString(teststring);
document.writeln("Testing \"<b>" + teststring + "</b>\" results in <b>" + errmsg + "</b><p>");
var teststring = "Here is a string without any illegal characters";
var errmsg = checkString(teststring);
document.writeln("Testing \"<b>" + teststring + "</b>\" results in <b>" + errmsg + "</b><p>");
</script>

</head>
<body>
</body>
</html>
 
0
 

I simplified my test script. It still seems to find illegal characters wherever they are in the test string.

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    "http://www.w3.org/TR/html4/strict.dtd"
    >
<html lang="en">
<head>
    <title>Test regex punctuation filter</title>
    
<script type="text/javascript">
function checkString (strng) {
    var error = false;
    var illegalChars = /[\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e]/g; // Don't allow any of these
    error = (illegalChars.test(strng));
return error;
}

// The following lines test the function with a string containing illegal chars.
var teststring = "Here is a str&ing with illegal characters";
var errmsg = checkString(teststring);
document.writeln("Testing \"<b>" + teststring + "</b>\" results in <b>" + errmsg + "</b><p>");
var teststring = "Here is a string without any illegal characters";
var errmsg = checkString(teststring);
document.writeln("Testing \"<b>" + teststring + "</b>\" results in <b>" + errmsg + "</b><p>");
</script>

</head>
<body>
</body>
</html>

Strange, try using one word in your test with an illegal character in the middle of the word. That's what I've been testing and it only detects the illegal character at the start or end of the word

 
1
 

In that regex for illegal characters I should not have put the \g modifier at the end. As soon as we find the first illegal character that is all we need to know. The /g global modifier attempts to match all illegal characters in the string starting at the position where it previously found a match. All we need to know is whether there is at least one illegal character in the string so we don't need /g and it is somehow giving us inconsistent results.

In your script, try replacing the regex like this:

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    "http://www.w3.org/TR/html4/strict.dtd"
    >
<html lang="en">
<head>
    <title>Test regex punctuation filter</title>
    
<script type="text/javascript">
function checkString (strng) {
    var error = false;
//var illegalChars = /[\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e]/g; // The /g (for global) is a goof.
    var illegalChars = /[\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e]/; // NOT global.
    error = (illegalChars.test(strng));
return error;
}

// The following lines test the function with a string containing illegal chars.
var teststring = "Java$cript"; //Illegal character. Test should return 'true'
var errmsg = checkString(teststring);
document.writeln("Testing \"<b>" + teststring + "</b>\" results in <b>" + errmsg + "</b><p>");
var teststring = "Javascript"; //No illegal character. Test should return 'false'
var errmsg = checkString(teststring);
document.writeln("Testing \"<b>" + teststring + "</b>\" results in <b>" + errmsg + "</b><p>");
</script>

</head>
<body>

</body>
</html>
 
0
 

Perfect. Thanks for that d5e5. Works exactly the way I want now.

Question Answered as of 4 Years Ago by d5e5
You
This question has already been solved: Start a new discussion instead
Post:
Start New Discussion
View similar articles that have also been tagged: