Added sanitizer extension

2014-05-28 00:33:27 +01:00 · 2014-05-28 00:33:27 +01:00 · 3ccc267028
commit 3ccc267028
parent 81b37f3f1e
3 changed files with 400 additions and 2 deletions
--- a/public/res/eventMgr.js
+++ b/public/res/eventMgr.js
@ -24,8 +24,8 @@ define([
    "extensions/documentManager",
    "extensions/workingIndicator",
    "extensions/notifications",
    "extensions/markdownExtra",
 	"extensions/umlDiagrams",
    "extensions/markdownExtra",
    "extensions/toc",
    "extensions/mathJax",
    "extensions/emailConverter",
@ -39,8 +39,9 @@ define([
    "extensions/shortcuts",
    "extensions/userCustom",
    "extensions/comments",
    "extensions/htmlSanitizer",
    "bootstrap",
-    "jquery-waitforimages",
+    "jquery-waitforimages"
 ], function($, _, crel, utils, logger, Extension, settings, settingsExtensionsAccordionHTML) {
    var eventMgr = {};
--- a/public/res/extensions/htmlSanitizer.js
+++ b/public/res/extensions/htmlSanitizer.js
@ -0,0 +1,395 @@
 define([
 	"jquery",
 	"underscore",
 	"utils",
 	"logger",
 	"classes/Extension",
 	"text!html/htmlSanitizerSettingsBlock.html"
 ], function($, _, utils, logger, Extension, htmlSanitizerSettingsBlockHTML) {
 	var htmlSanitizer = new Extension("htmlSanitizer", "HTML Sanitizer", true);
 	htmlSanitizer.settingsBlock = htmlSanitizerSettingsBlockHTML;
 	var buf;
 	htmlSanitizer.onPagedownConfigure = function(editor) {
 		var converter = editor.getConverter();
 		converter.hooks.chain("postConversion", function(html) {
 			buf = [];
 			html.split('<div class="se-preview-section-delimiter"></div>').forEach(function(sectionHtml) {
 				try {
 					htmlParser(sectionHtml, htmlSanitizeWriter(buf, function(uri, isImage) {
 						return !/^unsafe/.test(sanitizeUri(uri, isImage));
 					}));
 				}
 				catch(e) {
 				}
 				buf.push('<div class="se-preview-section-delimiter"></div>');
 			});
 			return buf.slice(0, -1).join('');
 		});
 	};
 	/**
 	 * @license AngularJS v1.2.16
 	 * (c) 2010-2014 Google, Inc. http://angularjs.org
 	 * License: MIT
 	 */
 	var aHrefSanitizationWhitelist = /^\s*(https?|ftp|mailto|tel|file):/,
 		imgSrcSanitizationWhitelist = /^\s*(https?|ftp|file):|data:image\//;
 	function sanitizeUri(uri, isImage) {
 		var regex = isImage ? imgSrcSanitizationWhitelist : aHrefSanitizationWhitelist;
 		var normalizedVal;
 		normalizedVal = utils.urlResolve(uri).href;
 		if(normalizedVal !== '' && !normalizedVal.match(regex)) {
 			return 'unsafe:' + normalizedVal;
 		}
 	}
 	// Regular Expressions for parsing tags and attributes
 	var START_TAG_REGEXP =
 			/^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
 		END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
 		ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
 		BEGIN_TAG_REGEXP = /^</,
 		BEGING_END_TAGE_REGEXP = /^<\s*\//,
 		COMMENT_REGEXP = /<!--(.*?)-->/g,
 		DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
 		CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
 		// Match everything outside of normal chars and " (quote character)
 		NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
 	function makeMap(str) {
 		var obj = {}, items = str.split(','), i;
 		for(i = 0; i < items.length; i++) {
 			obj[items[i]] = true;
 		}
 		return obj;
 	}
 	// Good source of info about elements and attributes
 	// http://dev.w3.org/html5/spec/Overview.html#semantics
 	// http://simon.html5.org/html-elements
 	// Safe Void Elements - HTML5
 	// http://dev.w3.org/html5/spec/Overview.html#void-elements
 	var voidElements = makeMap("area,br,col,hr,img,wbr");
 	// Elements that you can, intentionally, leave open (and which close themselves)
 	// http://dev.w3.org/html5/spec/Overview.html#optional-tags
 	var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
 		optionalEndTagInlineElements = makeMap("rp,rt"),
 		optionalEndTagElements = _.extend({},
 			optionalEndTagInlineElements,
 			optionalEndTagBlockElements);
 	// Safe Block Elements - HTML5
 	var blockElements = _.extend({}, optionalEndTagBlockElements, makeMap("address,article," +
 		"aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5," +
 		"h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul"));
 	// Inline Elements - HTML5
 	var inlineElements = _.extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b," +
 		"bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s," +
 		"samp,small,span,strike,strong,sub,sup,time,tt,u,var"));
 	// Special Elements (can contain anything)
 	var specialElements = makeMap("script,style");
 	var validElements = _.extend({},
 		voidElements,
 		blockElements,
 		inlineElements,
 		optionalEndTagElements);
 	//Attributes that have href and hence need to be sanitized
 	var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap");
 	var validAttrs = _.extend({}, uriAttrs, makeMap(
 			'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +
 			'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,' +
 			'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,' +
 			'scope,scrolling,shape,size,span,start,summary,target,title,type,' +
 			'valign,value,vspace,width'));
 	// benweet: Add id
 	validAttrs.id = true;
 	/*
 	 * HTML Parser By Misko Hevery (misko@hevery.com)
 	 * based on:  HTML Parser By John Resig (ejohn.org)
 	 * Original code by Erik Arvidsson, Mozilla Public License
 	 * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
 	 *
 	 * // Use like so:
 	 * htmlParser(htmlString, {
 	 *     start: function(tag, attrs, unary) {},
 	 *     end: function(tag) {},
 	 *     chars: function(text) {},
 	 *     comment: function(text) {}
 	 * });
 	 *
 	 */
 	/* jshint -W083 */
 	function htmlParser(html, handler) {
 		var index, chars, match, stack = [], last = html;
 		stack.last = function() {
 			return stack[ stack.length - 1 ];
 		};
 		function parseStartTag(tag, tagName, rest, unary) {
 			tagName = tagName && tagName.toLowerCase();
 			if(blockElements[ tagName ]) {
 				while(stack.last() && inlineElements[ stack.last() ]) {
 					parseEndTag("", stack.last());
 				}
 			}
 			if(optionalEndTagElements[ tagName ] && stack.last() == tagName) {
 				parseEndTag("", tagName);
 			}
 			unary = voidElements[ tagName ] || !!unary;
 			if(!unary) {
 				stack.push(tagName);
 			}
 			var attrs = {};
 			rest.replace(ATTR_REGEXP,
 				function(match, name, doubleQuotedValue, singleQuotedValue, unquotedValue) {
 					var value = doubleQuotedValue ||
 						singleQuotedValue ||
 						unquotedValue ||
 						'';
 					attrs[name] = decodeEntities(value);
 				});
 			if(handler.start) {
 				handler.start(tagName, attrs, unary);
 			}
 		}
 		function parseEndTag(tag, tagName) {
 			var pos = 0, i;
 			tagName = tagName && tagName.toLowerCase();
 			if(tagName) {
 				// Find the closest opened tag of the same type
 				for(pos = stack.length - 1; pos >= 0; pos--) {
 					if(stack[ pos ] == tagName) {
 						break;
 					}
 				}
 			}
 			if(pos >= 0) {
 				// Close all the open elements, up the stack
 				for(i = stack.length - 1; i >= pos; i--) {
 					if(handler.end) {
 						handler.end(stack[ i ]);
 					}
 				}
 				// Remove the open elements from the stack
 				stack.length = pos;
 			}
 		}
 		while(html) {
 			chars = true;
 			// Make sure we're not in a script or style element
 			if(!stack.last() || !specialElements[ stack.last() ]) {
 				// Comment
 				if(html.indexOf("<!--") === 0) {
 					// comments containing -- are not allowed unless they terminate the comment
 					index = html.indexOf("--", 4);
 					if(index >= 0 && html.lastIndexOf("-->", index) === index) {
 						if(handler.comment) {
 							handler.comment(html.substring(4, index));
 						}
 						html = html.substring(index + 3);
 						chars = false;
 					}
 					// DOCTYPE
 				} else if(DOCTYPE_REGEXP.test(html)) {
 					match = html.match(DOCTYPE_REGEXP);
 					if(match) {
 						html = html.replace(match[0], '');
 						chars = false;
 					}
 					// end tag
 				} else if(BEGING_END_TAGE_REGEXP.test(html)) {
 					match = html.match(END_TAG_REGEXP);
 					if(match) {
 						html = html.substring(match[0].length);
 						match[0].replace(END_TAG_REGEXP, parseEndTag);
 						chars = false;
 					}
 					// start tag
 				} else if(BEGIN_TAG_REGEXP.test(html)) {
 					match = html.match(START_TAG_REGEXP);
 					if(match) {
 						html = html.substring(match[0].length);
 						match[0].replace(START_TAG_REGEXP, parseStartTag);
 						chars = false;
 					}
 				}
 				if(chars) {
 					index = html.indexOf("<");
 					var text = index < 0 ? html : html.substring(0, index);
 					html = index < 0 ? "" : html.substring(index);
 					if(handler.chars) {
 						handler.chars(decodeEntities(text));
 					}
 				}
 			} else {
 				html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'),
 					function(all, text) {
 						text = text.replace(COMMENT_REGEXP, "$1").replace(CDATA_REGEXP, "$1");
 						if(handler.chars) {
 							handler.chars(decodeEntities(text));
 						}
 						return "";
 					});
 				parseEndTag("", stack.last());
 			}
 			if(html == last) {
 				//throw new Error("The sanitizer was unable to parse the following block of html: " + html);
 				stack.reverse();
 				return stack.forEach(function(tag) {
 					buf.push('</');
 					buf.push(tag);
 					buf.push('>');
 				});
 			}
 			last = html;
 		}
 		// Clean up any remaining tags
 		parseEndTag();
 	}
 	var hiddenPre = document.createElement("pre");
 	var spaceRe = /^(\s*)([\s\S]*?)(\s*)$/;
 	/**
 	 * decodes all entities into regular string
 	 * @param value
 	 * @returns {string} A string with decoded entities.
 	 */
 	function decodeEntities(value) {
 		if(!value) {
 			return '';
 		}
 		// Note: IE8 does not preserve spaces at the start/end of innerHTML
 		// so we must capture them and reattach them afterward
 		var parts = spaceRe.exec(value);
 		var spaceBefore = parts[1];
 		var spaceAfter = parts[3];
 		var content = parts[2];
 		if(content) {
 			hiddenPre.innerHTML = content.replace(/</g, "&lt;");
 			// innerText depends on styling as it doesn't display hidden elements.
 			// Therefore, it's better to use textContent not to cause unnecessary
 			// reflows. However, IE<9 don't support textContent so the innerText
 			// fallback is necessary.
 			content = 'textContent' in hiddenPre ?
 				hiddenPre.textContent : hiddenPre.innerText;
 		}
 		return spaceBefore + content + spaceAfter;
 	}
 	/**
 	 * Escapes all potentially dangerous characters, so that the
 	 * resulting string can be safely inserted into attribute or
 	 * element text.
 	 * @param value
 	 * @returns {string} escaped text
 	 */
 	function encodeEntities(value) {
 		return value.
 			replace(/&/g, '&amp;').
 			replace(NON_ALPHANUMERIC_REGEXP, function(value) {
 				return '&#' + value.charCodeAt(0) + ';';
 			}).
 			replace(/</g, '&lt;').
 			replace(/>/g, '&gt;');
 	}
 	/**
 	 * create an HTML/XML writer which writes to buffer
 	 * @param {Array} buf use buf.jain('') to get out sanitized html string
 	 * @returns {object} in the form of {
 	 *     start: function(tag, attrs, unary) {},
 	 *     end: function(tag) {},
 	 *     chars: function(text) {},
 	 *     comment: function(text) {}
 	 * }
 	 */
 	function htmlSanitizeWriter(buf, uriValidator) {
 		var ignore = false;
 		var out = _.bind(buf.push, buf);
 		return {
 			start: function(tag, attrs, unary) {
 				tag = tag && tag.toLowerCase();
 				if(!ignore && specialElements[tag]) {
 					ignore = tag;
 				}
 				if(!ignore && validElements[tag] === true) {
 					out('<');
 					out(tag);
 					_.forEach(attrs, function(value, key) {
 						var lkey = key && key.toLowerCase();
 						var isImage = (tag === 'img' && lkey === 'src') || (lkey === 'background');
 						if(validAttrs[lkey] === true &&
 							(uriAttrs[lkey] !== true || uriValidator(value, isImage))) {
 							out(' ');
 							out(key);
 							out('="');
 							out(encodeEntities(value));
 							out('"');
 						}
 					});
 					out(unary ? '/>' : '>');
 				}
 			},
 			end: function(tag) {
 				tag = tag && tag.toLowerCase();
 				if(!ignore && validElements[tag] === true) {
 					out('</');
 					out(tag);
 					out('>');
 				}
 				if(tag == ignore) {
 					ignore = false;
 				}
 			},
 			chars: function(chars) {
 				if(!ignore) {
 					out(encodeEntities(chars));
 				}
 			}
 		};
 	}
 	return htmlSanitizer;
 });
--- a/public/res/html/htmlSanitizerSettingsBlock.html
+++ b/public/res/html/htmlSanitizerSettingsBlock.html
@ -0,0 +1,2 @@
 <p>Prevents cross-site-scripting attacks (XSS).</p>
 <p class="alert alert-danger"><i class="icon-attention"></i> <b>Careful:</b> Disable at your own risk!</p>
		`@ -0,0 +1,2 @@`
							`<p>Prevents cross-site-scripting attacks (XSS).</p>`
							`<p class="alert alert-danger"><i class="icon-attention"></i> <b>Careful:</b> Disable at your own risk!</p>`