fix($sanitize): sanitize DOCTYPE declarations correctly

HTML to be sanitized that contains a DOCTYPE declaration were causing
the HTML parser to throw an error.  Now the parser correctly removes
the declarations when sanitizing HTML.

Closes #3931
This commit is contained in:
paolo-delmundo 2013-10-02 20:49:20 +01:00 committed by Pete Bacon Darwin
parent e36e28ebd4
commit e66c23fe55
2 changed files with 17 additions and 1 deletions

View file

@ -135,6 +135,7 @@ var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:
BEGIN_TAG_REGEXP = /^</,
BEGING_END_TAGE_REGEXP = /^<\s*\//,
COMMENT_REGEXP = /<!--(.*?)-->/g,
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
URI_REGEXP = /^((ftp|https?):\/\/|mailto:|tel:|#)/i,
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
@ -218,7 +219,14 @@ function htmlParser( html, handler ) {
html = html.substring( index + 3 );
chars = false;
}
// DOCTYPE
} else if ( DOCTYPE_REGEXP.test(html) ) {
match = html.match( DOCTYPE_REGEXP );
if ( match ) {
html = html.replace( match[0] , '');
chars = false;
}
// end tag
} else if ( BEGING_END_TAGE_REGEXP.test(html) ) {
match = html.match( END_TAG_REGEXP );

View file

@ -24,7 +24,7 @@ describe('HTML', function() {
attrs: attrs,
unary: unary
};
// Since different browsers handle newlines differenttly we trim
// Since different browsers handle newlines differently we trim
// so that it is easier to write tests.
angular.forEach(attrs, function(value, key) {
attrs[key] = value.replace(/^\s*/, '').replace(/\s*$/, '')
@ -112,6 +112,13 @@ describe('HTML', function() {
expectHTML('a<SCRIPT>evil< / scrIpt >c.').toEqual('ac.');
});
it('should remove DOCTYPE header', function() {
expectHTML('<!DOCTYPE html>').toEqual('');
expectHTML('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n"http://www.w3.org/TR/html4/strict.dtd">').toEqual('');
expectHTML('a<!DOCTYPE html>c.').toEqual('ac.');
expectHTML('a<!DocTyPe html>c.').toEqual('ac.');
});
it('should remove nested script', function() {
expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.');
});
@ -320,5 +327,6 @@ describe('HTML', function() {
});
});
});
});