angular.js/src/ngSanitize/sanitize.js
Michał Gołębiowski bf1972dc1e fix(ngSanitize): prefer textContent to innerText to avoid layout trashing
innerText depends on styling as it doesn't display hidden elements.
Therefore, it's better to use textContent not to cause unnecessary
reflows. However, IE<9 don't support textContent so the innerText
fallback is necessary.
2013-12-03 14:45:30 -08:00

466 lines
15 KiB
JavaScript

'use strict';
var $sanitizeMinErr = angular.$$minErr('$sanitize');
/**
* @ngdoc overview
* @name ngSanitize
* @description
*
* # ngSanitize
*
* The `ngSanitize` module provides functionality to sanitize HTML.
*
* {@installModule sanitize}
*
* <div doc-module-components="ngSanitize"></div>
*
* See {@link ngSanitize.$sanitize `$sanitize`} for usage.
*/
/*
* HTML Parser By Misko Hevery (misko@hevery.com)
* based on: HTML Parser By John Resig (ejohn.org)
* Original code by Erik Arvidsson, Mozilla Public License
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
*
* // Use like so:
* htmlParser(htmlString, {
* start: function(tag, attrs, unary) {},
* end: function(tag) {},
* chars: function(text) {},
* comment: function(text) {}
* });
*
*/
/**
* @ngdoc service
* @name ngSanitize.$sanitize
* @function
*
* @description
* The input is sanitized by parsing the html into tokens. All safe tokens (from a whitelist) are
* then serialized back to properly escaped html string. This means that no unsafe input can make
* it into the returned string, however, since our parser is more strict than a typical browser
* parser, it's possible that some obscure input, which would be recognized as valid HTML by a
* browser, won't make it through the sanitizer.
* The whitelist is configured using the functions `aHrefSanitizationWhitelist` and
* `imgSrcSanitizationWhitelist` of {@link ng.$compileProvider `$compileProvider`}.
*
* @param {string} html Html input.
* @returns {string} Sanitized html.
*
* @example
<doc:example module="ngSanitize">
<doc:source>
<script>
function Ctrl($scope, $sce) {
$scope.snippet =
'<p style="color:blue">an html\n' +
'<em onmouseover="this.textContent=\'PWN3D!\'">click here</em>\n' +
'snippet</p>';
$scope.deliberatelyTrustDangerousSnippet = function() {
return $sce.trustAsHtml($scope.snippet);
};
}
</script>
<div ng-controller="Ctrl">
Snippet: <textarea ng-model="snippet" cols="60" rows="3"></textarea>
<table>
<tr>
<td>Directive</td>
<td>How</td>
<td>Source</td>
<td>Rendered</td>
</tr>
<tr id="bind-html-with-sanitize">
<td>ng-bind-html</td>
<td>Automatically uses $sanitize</td>
<td><pre>&lt;div ng-bind-html="snippet"&gt;<br/>&lt;/div&gt;</pre></td>
<td><div ng-bind-html="snippet"></div></td>
</tr>
<tr id="bind-html-with-trust">
<td>ng-bind-html</td>
<td>Bypass $sanitize by explicitly trusting the dangerous value</td>
<td>
<pre>&lt;div ng-bind-html="deliberatelyTrustDangerousSnippet()"&gt;
&lt;/div&gt;</pre>
</td>
<td><div ng-bind-html="deliberatelyTrustDangerousSnippet()"></div></td>
</tr>
<tr id="bind-default">
<td>ng-bind</td>
<td>Automatically escapes</td>
<td><pre>&lt;div ng-bind="snippet"&gt;<br/>&lt;/div&gt;</pre></td>
<td><div ng-bind="snippet"></div></td>
</tr>
</table>
</div>
</doc:source>
<doc:scenario>
it('should sanitize the html snippet by default', function() {
expect(using('#bind-html-with-sanitize').element('div').html()).
toBe('<p>an html\n<em>click here</em>\nsnippet</p>');
});
it('should inline raw snippet if bound to a trusted value', function() {
expect(using('#bind-html-with-trust').element("div").html()).
toBe("<p style=\"color:blue\">an html\n" +
"<em onmouseover=\"this.textContent='PWN3D!'\">click here</em>\n" +
"snippet</p>");
});
it('should escape snippet without any filter', function() {
expect(using('#bind-default').element('div').html()).
toBe("&lt;p style=\"color:blue\"&gt;an html\n" +
"&lt;em onmouseover=\"this.textContent='PWN3D!'\"&gt;click here&lt;/em&gt;\n" +
"snippet&lt;/p&gt;");
});
it('should update', function() {
input('snippet').enter('new <b onclick="alert(1)">text</b>');
expect(using('#bind-html-with-sanitize').element('div').html()).toBe('new <b>text</b>');
expect(using('#bind-html-with-trust').element('div').html()).toBe(
'new <b onclick="alert(1)">text</b>');
expect(using('#bind-default').element('div').html()).toBe(
"new &lt;b onclick=\"alert(1)\"&gt;text&lt;/b&gt;");
});
</doc:scenario>
</doc:example>
*/
function $SanitizeProvider() {
this.$get = ['$$sanitizeUri', function($$sanitizeUri) {
return function(html) {
var buf = [];
htmlParser(html, htmlSanitizeWriter(buf, function(uri, isImage) {
return !/^unsafe/.test($$sanitizeUri(uri, isImage));
}));
return buf.join('');
};
}];
}
function sanitizeText(chars) {
var buf = [];
var writer = htmlSanitizeWriter(buf, angular.noop);
writer.chars(chars);
return buf.join('');
}
// Regular Expressions for parsing tags and attributes
var START_TAG_REGEXP =
/^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
BEGIN_TAG_REGEXP = /^</,
BEGING_END_TAGE_REGEXP = /^<\s*\//,
COMMENT_REGEXP = /<!--(.*?)-->/g,
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
// Match everything outside of normal chars and " (quote character)
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
// Good source of info about elements and attributes
// http://dev.w3.org/html5/spec/Overview.html#semantics
// http://simon.html5.org/html-elements
// Safe Void Elements - HTML5
// http://dev.w3.org/html5/spec/Overview.html#void-elements
var voidElements = makeMap("area,br,col,hr,img,wbr");
// Elements that you can, intentionally, leave open (and which close themselves)
// http://dev.w3.org/html5/spec/Overview.html#optional-tags
var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
optionalEndTagInlineElements = makeMap("rp,rt"),
optionalEndTagElements = angular.extend({},
optionalEndTagInlineElements,
optionalEndTagBlockElements);
// Safe Block Elements - HTML5
var blockElements = angular.extend({}, optionalEndTagBlockElements, makeMap("address,article," +
"aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5," +
"h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul"));
// Inline Elements - HTML5
var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b," +
"bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s," +
"samp,small,span,strike,strong,sub,sup,time,tt,u,var"));
// Special Elements (can contain anything)
var specialElements = makeMap("script,style");
var validElements = angular.extend({},
voidElements,
blockElements,
inlineElements,
optionalEndTagElements);
//Attributes that have href and hence need to be sanitized
var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap");
var validAttrs = angular.extend({}, uriAttrs, makeMap(
'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+
'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,'+
'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,'+
'scope,scrolling,shape,span,start,summary,target,title,type,'+
'valign,value,vspace,width'));
function makeMap(str) {
var obj = {}, items = str.split(','), i;
for (i = 0; i < items.length; i++) obj[items[i]] = true;
return obj;
}
/**
* @example
* htmlParser(htmlString, {
* start: function(tag, attrs, unary) {},
* end: function(tag) {},
* chars: function(text) {},
* comment: function(text) {}
* });
*
* @param {string} html string
* @param {object} handler
*/
function htmlParser( html, handler ) {
var index, chars, match, stack = [], last = html;
stack.last = function() { return stack[ stack.length - 1 ]; };
while ( html ) {
chars = true;
// Make sure we're not in a script or style element
if ( !stack.last() || !specialElements[ stack.last() ] ) {
// Comment
if ( html.indexOf("<!--") === 0 ) {
// comments containing -- are not allowed unless they terminate the comment
index = html.indexOf("--", 4);
if ( index >= 0 && html.lastIndexOf("-->", index) === index) {
if (handler.comment) handler.comment( html.substring( 4, index ) );
html = html.substring( index + 3 );
chars = false;
}
// DOCTYPE
} else if ( DOCTYPE_REGEXP.test(html) ) {
match = html.match( DOCTYPE_REGEXP );
if ( match ) {
html = html.replace( match[0] , '');
chars = false;
}
// end tag
} else if ( BEGING_END_TAGE_REGEXP.test(html) ) {
match = html.match( END_TAG_REGEXP );
if ( match ) {
html = html.substring( match[0].length );
match[0].replace( END_TAG_REGEXP, parseEndTag );
chars = false;
}
// start tag
} else if ( BEGIN_TAG_REGEXP.test(html) ) {
match = html.match( START_TAG_REGEXP );
if ( match ) {
html = html.substring( match[0].length );
match[0].replace( START_TAG_REGEXP, parseStartTag );
chars = false;
}
}
if ( chars ) {
index = html.indexOf("<");
var text = index < 0 ? html : html.substring( 0, index );
html = index < 0 ? "" : html.substring( index );
if (handler.chars) handler.chars( decodeEntities(text) );
}
} else {
html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'),
function(all, text){
text = text.replace(COMMENT_REGEXP, "$1").replace(CDATA_REGEXP, "$1");
if (handler.chars) handler.chars( decodeEntities(text) );
return "";
});
parseEndTag( "", stack.last() );
}
if ( html == last ) {
throw $sanitizeMinErr('badparse', "The sanitizer was unable to parse the following block " +
"of html: {0}", html);
}
last = html;
}
// Clean up any remaining tags
parseEndTag();
function parseStartTag( tag, tagName, rest, unary ) {
tagName = angular.lowercase(tagName);
if ( blockElements[ tagName ] ) {
while ( stack.last() && inlineElements[ stack.last() ] ) {
parseEndTag( "", stack.last() );
}
}
if ( optionalEndTagElements[ tagName ] && stack.last() == tagName ) {
parseEndTag( "", tagName );
}
unary = voidElements[ tagName ] || !!unary;
if ( !unary )
stack.push( tagName );
var attrs = {};
rest.replace(ATTR_REGEXP,
function(match, name, doubleQuotedValue, singleQuotedValue, unquotedValue) {
var value = doubleQuotedValue
|| singleQuotedValue
|| unquotedValue
|| '';
attrs[name] = decodeEntities(value);
});
if (handler.start) handler.start( tagName, attrs, unary );
}
function parseEndTag( tag, tagName ) {
var pos = 0, i;
tagName = angular.lowercase(tagName);
if ( tagName )
// Find the closest opened tag of the same type
for ( pos = stack.length - 1; pos >= 0; pos-- )
if ( stack[ pos ] == tagName )
break;
if ( pos >= 0 ) {
// Close all the open elements, up the stack
for ( i = stack.length - 1; i >= pos; i-- )
if (handler.end) handler.end( stack[ i ] );
// Remove the open elements from the stack
stack.length = pos;
}
}
}
var hiddenPre=document.createElement("pre");
var spaceRe = /^(\s*)([\s\S]*?)(\s*)$/;
/**
* decodes all entities into regular string
* @param value
* @returns {string} A string with decoded entities.
*/
function decodeEntities(value) {
if (!value) { return ''; }
// Note: IE8 does not preserve spaces at the start/end of innerHTML
// so we must capture them and reattach them afterward
var parts = spaceRe.exec(value);
var spaceBefore = parts[1];
var spaceAfter = parts[3];
var content = parts[2];
if (content) {
hiddenPre.innerHTML=content.replace(/</g,"&lt;");
// innerText depends on styling as it doesn't display hidden elements.
// Therefore, it's better to use textContent not to cause unnecessary
// reflows. However, IE<9 don't support textContent so the innerText
// fallback is necessary.
content = 'textContent' in hiddenPre ?
hiddenPre.textContent : hiddenPre.innerText;
}
return spaceBefore + content + spaceAfter;
}
/**
* Escapes all potentially dangerous characters, so that the
* resulting string can be safely inserted into attribute or
* element text.
* @param value
* @returns escaped text
*/
function encodeEntities(value) {
return value.
replace(/&/g, '&amp;').
replace(NON_ALPHANUMERIC_REGEXP, function(value){
return '&#' + value.charCodeAt(0) + ';';
}).
replace(/</g, '&lt;').
replace(/>/g, '&gt;');
}
/**
* create an HTML/XML writer which writes to buffer
* @param {Array} buf use buf.jain('') to get out sanitized html string
* @returns {object} in the form of {
* start: function(tag, attrs, unary) {},
* end: function(tag) {},
* chars: function(text) {},
* comment: function(text) {}
* }
*/
function htmlSanitizeWriter(buf, uriValidator){
var ignore = false;
var out = angular.bind(buf, buf.push);
return {
start: function(tag, attrs, unary){
tag = angular.lowercase(tag);
if (!ignore && specialElements[tag]) {
ignore = tag;
}
if (!ignore && validElements[tag] === true) {
out('<');
out(tag);
angular.forEach(attrs, function(value, key){
var lkey=angular.lowercase(key);
var isImage = (tag === 'img' && lkey === 'src') || (lkey === 'background');
if (validAttrs[lkey] === true &&
(uriAttrs[lkey] !== true || uriValidator(value, isImage))) {
out(' ');
out(key);
out('="');
out(encodeEntities(value));
out('"');
}
});
out(unary ? '/>' : '>');
}
},
end: function(tag){
tag = angular.lowercase(tag);
if (!ignore && validElements[tag] === true) {
out('</');
out(tag);
out('>');
}
if (tag == ignore) {
ignore = false;
}
},
chars: function(chars){
if (!ignore) {
out(encodeEntities(chars));
}
}
};
}
// define ngSanitize module and register $sanitize service
angular.module('ngSanitize', []).provider('$sanitize', $SanitizeProvider);