safehtml.js

// Copyright 2013 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


/**
 * @fileoverview The SafeHtml type and its builders.
 *
 * TODO(user): Link to document stating type contract.
 */

goog.provide('goog.html.SafeHtml');

goog.require('goog.array');
goog.require('goog.asserts');
goog.require('goog.dom.tags');
goog.require('goog.html.SafeStyle');
goog.require('goog.html.SafeUrl');
goog.require('goog.i18n.bidi.Dir');
goog.require('goog.i18n.bidi.DirectionalString');
goog.require('goog.object');
goog.require('goog.string');
goog.require('goog.string.Const');
goog.require('goog.string.TypedString');



/**
 * A string that is safe to use in HTML context in DOM APIs and HTML documents.
 *
 * A SafeHtml is a string-like object that carries the security type contract
 * that its value as a string will not cause untrusted script execution when
 * evaluated as HTML in a browser.
 *
 * Values of this type are guaranteed to be safe to use in HTML contexts,
 * such as, assignment to the innerHTML DOM property, or interpolation into
 * a HTML template in HTML PC_DATA context, in the sense that the use will not
 * result in a Cross-Site-Scripting vulnerability.
 *
 * Instances of this type must be created via the factory methods
 * ({@code goog.html.SafeHtml.create}, {@code goog.html.SafeHtml.htmlEscape}),
 * etc and not by invoking its constructor.  The constructor intentionally
 * takes no parameters and the type is immutable; hence only a default instance
 * corresponding to the empty string can be obtained via constructor invocation.
 *
 * @see goog.html.SafeHtml#create
 * @see goog.html.SafeHtml#htmlEscape
 * @constructor
 * @final
 * @struct
 * @implements {goog.i18n.bidi.DirectionalString}
 * @implements {goog.string.TypedString}
 */
goog.html.SafeHtml = function() {
  /**
   * The contained value of this SafeHtml.  The field has a purposely ugly
   * name to make (non-compiled) code that attempts to directly access this
   * field stand out.
   * @private {string}
   */
  this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = '';

  /**
   * A type marker used to implement additional run-time type checking.
   * @see goog.html.SafeHtml#unwrap
   * @const
   * @private
   */
  this.SAFE_HTML_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ =
      goog.html.SafeHtml.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_;

  /**
   * This SafeHtml's directionality, or null if unknown.
   * @private {?goog.i18n.bidi.Dir}
   */
  this.dir_ = null;
};


/**
 * @override
 * @const
 */
goog.html.SafeHtml.prototype.implementsGoogI18nBidiDirectionalString = true;


/** @override */
goog.html.SafeHtml.prototype.getDirection = function() {
  return this.dir_;
};


/**
 * @override
 * @const
 */
goog.html.SafeHtml.prototype.implementsGoogStringTypedString = true;


/**
 * Returns this SafeHtml's value a string.
 *
 * IMPORTANT: In code where it is security relevant that an object's type is
 * indeed {@code SafeHtml}, use {@code goog.html.SafeHtml.unwrap} instead of
 * this method. If in doubt, assume that it's security relevant. In particular,
 * note that goog.html functions which return a goog.html type do not guarantee
 * that the returned instance is of the right type. For example:
 *
 * <pre>
 * var fakeSafeHtml = new String('fake');
 * fakeSafeHtml.__proto__ = goog.html.SafeHtml.prototype;
 * var newSafeHtml = goog.html.SafeHtml.htmlEscape(fakeSafeHtml);
 * // newSafeHtml is just an alias for fakeSafeHtml, it's passed through by
 * // goog.html.SafeHtml.htmlEscape() as fakeSafeHtml
 * // instanceof goog.html.SafeHtml.
 * </pre>
 *
 * @see goog.html.SafeHtml#unwrap
 * @override
 */
goog.html.SafeHtml.prototype.getTypedStringValue = function() {
  return this.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
};


if (goog.DEBUG) {
  /**
   * Returns a debug string-representation of this value.
   *
   * To obtain the actual string value wrapped in a SafeHtml, use
   * {@code goog.html.SafeHtml.unwrap}.
   *
   * @see goog.html.SafeHtml#unwrap
   * @override
   */
  goog.html.SafeHtml.prototype.toString = function() {
    return 'SafeHtml{' + this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ +
        '}';
  };
}


/**
 * Performs a runtime check that the provided object is indeed a SafeHtml
 * object, and returns its value.
 * @param {!goog.html.SafeHtml} safeHtml The object to extract from.
 * @return {string} The SafeHtml object's contained string, unless the run-time
 *     type check fails. In that case, {@code unwrap} returns an innocuous
 *     string, or, if assertions are enabled, throws
 *     {@code goog.asserts.AssertionError}.
 */
goog.html.SafeHtml.unwrap = function(safeHtml) {
  // Perform additional run-time type-checking to ensure that safeHtml is indeed
  // an instance of the expected type.  This provides some additional protection
  // against security bugs due to application code that disables type checks.
  // Specifically, the following checks are performed:
  // 1. The object is an instance of the expected type.
  // 2. The object is not an instance of a subclass.
  // 3. The object carries a type marker for the expected type. "Faking" an
  // object requires a reference to the type marker, which has names intended
  // to stand out in code reviews.
  if (safeHtml instanceof goog.html.SafeHtml &&
      safeHtml.constructor === goog.html.SafeHtml &&
      safeHtml.SAFE_HTML_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ ===
          goog.html.SafeHtml.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_) {
    return safeHtml.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
  } else {
    goog.asserts.fail('expected object of type SafeHtml, got \'' +
                      safeHtml + '\'');
    return 'type_error:SafeHtml';
  }
};


/**
 * Shorthand for union of types that can sensibly be converted to strings
 * or might already be SafeHtml (as SafeHtml is a goog.string.TypedString).
 * @private
 * @typedef {string|number|boolean|!goog.string.TypedString|
 *           !goog.i18n.bidi.DirectionalString}
 */
goog.html.SafeHtml.TextOrHtml_;


/**
 * Returns HTML-escaped text as a SafeHtml object.
 *
 * If text is of a type that implements
 * {@code goog.i18n.bidi.DirectionalString}, the directionality of the new
 * {@code SafeHtml} object is set to {@code text}'s directionality, if known.
 * Otherwise, the directionality of the resulting SafeHtml is unknown (i.e.,
 * {@code null}).
 *
 * @param {!goog.html.SafeHtml.TextOrHtml_} textOrHtml The text to escape. If
 *     the parameter is of type SafeHtml it is returned directly (no escaping
 *     is done).
 * @return {!goog.html.SafeHtml} The escaped text, wrapped as a SafeHtml.
 */
goog.html.SafeHtml.htmlEscape = function(textOrHtml) {
  if (textOrHtml instanceof goog.html.SafeHtml) {
    return textOrHtml;
  }
  var dir = null;
  if (textOrHtml.implementsGoogI18nBidiDirectionalString) {
    dir = textOrHtml.getDirection();
  }
  var textAsString;
  if (textOrHtml.implementsGoogStringTypedString) {
    textAsString = textOrHtml.getTypedStringValue();
  } else {
    textAsString = String(textOrHtml);
  }
  return goog.html.SafeHtml.createSafeHtmlSecurityPrivateDoNotAccessOrElse_(
      goog.string.htmlEscape(textAsString), dir);
};


/**
 * Returns HTML-escaped text as a SafeHtml object, with newlines changed to
 * &lt;br&gt;.
 * @param {!goog.html.SafeHtml.TextOrHtml_} textOrHtml The text to escape. If
 *     the parameter is of type SafeHtml it is returned directly (no escaping
 *     is done).
 * @return {!goog.html.SafeHtml} The escaped text, wrapped as a SafeHtml.
 */
goog.html.SafeHtml.htmlEscapePreservingNewlines = function(textOrHtml) {
  if (textOrHtml instanceof goog.html.SafeHtml) {
    return textOrHtml;
  }
  var html = goog.html.SafeHtml.htmlEscape(textOrHtml);
  return goog.html.SafeHtml.createSafeHtmlSecurityPrivateDoNotAccessOrElse_(
      goog.string.newLineToBr(goog.html.SafeHtml.unwrap(html)),
      html.getDirection());
};


/**
 * Coerces an arbitrary object into a SafeHtml object.
 *
 * If {@code textOrHtml} is already of type {@code goog.html.SafeHtml}, the same
 * object is returned. Otherwise, {@code textOrHtml} is coerced to string, and
 * HTML-escaped. If {@code textOrHtml} is of a type that implements
 * {@code goog.i18n.bidi.DirectionalString}, its directionality, if known, is
 * preserved.
 *
 * @param {!goog.html.SafeHtml.TextOrHtml_} textOrHtml The text or SafeHtml to
 *     coerce.
 * @return {!goog.html.SafeHtml} The resulting SafeHtml object.
 * @deprecated Use goog.html.SafeHtml.htmlEscape.
 */
goog.html.SafeHtml.from = goog.html.SafeHtml.htmlEscape;


/**
 * @const
 * @private
 */
goog.html.SafeHtml.VALID_NAMES_IN_TAG_ = /^[a-zA-Z0-9-]+$/;


/**
 * Set of attributes containing URL as defined at
 * http://www.w3.org/TR/html5/index.html#attributes-1.
 * @const
 * @private
 */
goog.html.SafeHtml.URL_ATTRIBUTES_ = goog.object.createSet('action', 'cite',
    'data', 'formaction', 'href', 'manifest', 'poster', 'src');


// TODO(user): Perhaps add <template> used by Polymer?
/**
 * Set of tag names that are too dangerous.
 * @const
 * @private
 */
goog.html.SafeHtml.NOT_ALLOWED_TAG_NAMES_ = goog.object.createSet('link',
    'script', 'style');


/**
 * @private
 * @typedef {string|number|goog.string.Const|goog.html.SafeUrl|
 *     goog.html.SafeStyle|goog.html.SafeStyle.PropertyMap}
 */
goog.html.SafeHtml.AttributeValue_;


/**
 * Creates a SafeHtml content consisting of a tag with optional attributes and
 * optional content.
 *
 * For convenience tag names and attribute names are accepted as regular
 * strings, instead of goog.string.Const. Nevertheless, you should not pass
 * user-controlled values to these parameters. Note that these parameters are
 * syntactically validated at runtime, and invalid values will result in
 * an exception.
 *
 * Example usage:
 *
 * goog.html.SafeHtml.create('br');
 * goog.html.SafeHtml.create('div', {'class': 'a'});
 * goog.html.SafeHtml.create('p', {}, 'a');
 * goog.html.SafeHtml.create('p', {}, goog.html.SafeHtml.create('br'));
 *
 * goog.html.SafeHtml.create('span', {
 *   'style': {'margin': '0'}
 * });
 *
 * @param {string} tagName The name of the tag. Only tag names consisting of
 *     [a-zA-Z0-9-] are allowed. <link>, <script> and <style> tags are not
 *     supported.
 * @param {!Object.<string, goog.html.SafeHtml.AttributeValue_>=}
 *     opt_attributes Mapping from attribute names to their values. Only
 *     attribute names consisting of [a-zA-Z0-9-] are allowed. Attributes with
 *     a special meaning (e.g. on*) require goog.string.Const value, attributes
 *     containing URL require goog.string.Const or goog.html.SafeUrl. The
 *     "style" attribute accepts goog.html.SafeStyle or a map which will be
 *     passed to goog.html.SafeStyle.create. Value of null or undefined causes
 *     the attribute to be omitted. Values are HTML-escaped before usage.
 * @param {!goog.html.SafeHtml.TextOrHtml_|
 *     !Array.<!goog.html.SafeHtml.TextOrHtml_>=} opt_content Content to put
 *     inside the tag. This must be empty for void tags like <br>. Array
 *     elements are concatenated.
 * @return {!goog.html.SafeHtml} The SafeHtml content with the tag.
 * @throws {Error} If invalid tag name, attribute name, or attribute value is
 *     provided.
 * @throws {goog.asserts.AssertionError} If content for void tag is provided.
 */
goog.html.SafeHtml.create = function(tagName, opt_attributes, opt_content) {
  if (!goog.html.SafeHtml.VALID_NAMES_IN_TAG_.test(tagName)) {
    throw Error('Invalid tag name <' + tagName + '>.');
  }
  if (tagName.toLowerCase() in goog.html.SafeHtml.NOT_ALLOWED_TAG_NAMES_) {
    throw Error('Tag name <' + tagName + '> is not allowed for SafeHtml.');
  }
  var dir = null;
  var result = '<' + tagName;

  if (opt_attributes) {
    for (var name in opt_attributes) {
      if (!goog.html.SafeHtml.VALID_NAMES_IN_TAG_.test(name)) {
        throw Error('Invalid attribute name "' + name + '".');
      }
      var value = opt_attributes[name];
      if (value == null) {
        continue;
      }
      if (value instanceof goog.string.Const) {
        // If it's goog.string.Const, allow any valid attribute name.
        value = goog.string.Const.unwrap(value);
      } else if (name.toLowerCase() == 'style') {
        value = goog.html.SafeHtml.getStyleValue_(value);
      } else if (/^on/i.test(name)) {
        // TODO(user): Disallow more attributes with a special meaning.
        throw Error('Attribute "' + name +
            '" requires goog.string.Const value, "' + value + '" given.');
      } else if (value instanceof goog.html.SafeUrl) {
        // If it's goog.html.SafeUrl, allow any non-JavaScript attribute name.
        value = goog.html.SafeUrl.unwrap(value);
      } else if (name.toLowerCase() in goog.html.SafeHtml.URL_ATTRIBUTES_) {
        throw Error('Attribute "' + name +
            '" requires goog.string.Const or goog.html.SafeUrl value, "' +
            value + '" given.');
      }
      goog.asserts.assert(goog.isString(value) || goog.isNumber(value),
          'String or number value expected, got ' +
          (typeof value) + ' with value: ' + value);
      result += ' ' + name + '="' + goog.string.htmlEscape(String(value)) + '"';
    }
  }

  var content = opt_content;
  if (!goog.isDef(content)) {
    content = [];
  } else if (!goog.isArray(content)) {
    content = [content];
  }

  if (goog.dom.tags.isVoidTag(tagName.toLowerCase())) {
    goog.asserts.assert(!content.length,
        'Void tag <' + tagName + '> does not allow content.');
    result += '>';
  } else {
    var html = goog.html.SafeHtml.concat(content);
    result += '>' + goog.html.SafeHtml.unwrap(html) + '</' + tagName + '>';
    dir = html.getDirection();
  }

  var dirAttribute = opt_attributes && opt_attributes['dir'];
  if (dirAttribute) {
    if (/^(ltr|rtl|auto)$/i.test(dirAttribute)) {
      // If the tag has the "dir" attribute specified then its direction is
      // neutral because it can be safely used in any context.
      dir = goog.i18n.bidi.Dir.NEUTRAL;
    } else {
      dir = null;
    }
  }

  return goog.html.SafeHtml.createSafeHtmlSecurityPrivateDoNotAccessOrElse_(
      result, dir);
};


/**
 * Gets value allowed in "style" attribute.
 * @param {goog.html.SafeHtml.AttributeValue_} value It could be SafeStyle or a
 *     map which will be passed to goog.html.SafeStyle.create.
 * @return {string} Unwrapped value.
 * @throws {Error} If string value is given.
 * @private
 */
goog.html.SafeHtml.getStyleValue_ = function(value) {
  if (!goog.isObject(value)) {
    throw Error('The "style" attribute requires goog.html.SafeStyle or map ' +
        'of style properties, ' + (typeof value) + ' given: ' + value);
  }
  if (!(value instanceof goog.html.SafeStyle)) {
    // Process the property bag into a style object.
    value = goog.html.SafeStyle.create(value);
  }
  return goog.html.SafeStyle.unwrap(value);
};


/**
 * Creates a SafeHtml content with known directionality consisting of a tag with
 * optional attributes and optional content.
 * @param {!goog.i18n.bidi.Dir} dir Directionality.
 * @param {string} tagName
 * @param {!Object.<string, goog.html.SafeHtml.AttributeValue_>=} opt_attributes
 * @param {!goog.html.SafeHtml.TextOrHtml_|
 *     !Array.<!goog.html.SafeHtml.TextOrHtml_>=} opt_content
 * @return {!goog.html.SafeHtml} The SafeHtml content with the tag.
 */
goog.html.SafeHtml.createWithDir = function(dir, tagName, opt_attributes,
    opt_content) {
  var html = goog.html.SafeHtml.create(tagName, opt_attributes, opt_content);
  html.dir_ = dir;
  return html;
};


/**
 * Creates a new SafeHtml object by concatenating the values.
 * @param {...!goog.html.SafeHtml.TextOrHtml_|
 *     !Array.<!goog.html.SafeHtml.TextOrHtml_>} var_args Elements of array
 *     arguments would be processed recursively.
 * @return {!goog.html.SafeHtml}
 */
goog.html.SafeHtml.concat = function(var_args) {
  var dir = goog.i18n.bidi.Dir.NEUTRAL;
  var content = '';

  /**
   * @param {!goog.html.SafeHtml.TextOrHtml_|
   *     !Array.<!goog.html.SafeHtml.TextOrHtml_>} argument
   */
  var addArgument = function(argument) {
    if (goog.isArray(argument)) {
      goog.array.forEach(argument, addArgument);
    } else {
      var html = goog.html.SafeHtml.htmlEscape(argument);
      content += goog.html.SafeHtml.unwrap(html);
      var htmlDir = html.getDirection();
      if (dir == goog.i18n.bidi.Dir.NEUTRAL) {
        dir = htmlDir;
      } else if (htmlDir != goog.i18n.bidi.Dir.NEUTRAL && dir != htmlDir) {
        dir = null;
      }
    }
  };

  goog.array.forEach(arguments, addArgument);
  return goog.html.SafeHtml.createSafeHtmlSecurityPrivateDoNotAccessOrElse_(
      content, dir);
};


/**
 * Creates a new SafeHtml object with known directionality by concatenating the
 * values.
 * @param {!goog.i18n.bidi.Dir} dir Directionality.
 * @param {...!goog.html.SafeHtml.TextOrHtml_|
 *     !Array.<!goog.html.SafeHtml.TextOrHtml_>} var_args Elements of array
 *     arguments would be processed recursively.
 * @return {!goog.html.SafeHtml}
 */
goog.html.SafeHtml.concatWithDir = function(dir, var_args) {
  var html = goog.html.SafeHtml.concat(goog.array.slice(arguments, 1));
  html.dir_ = dir;
  return html;
};


/**
 * Type marker for the SafeHtml type, used to implement additional run-time
 * type checking.
 * @const
 * @private
 */
goog.html.SafeHtml.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = {};


/**
 * Utility method to create SafeHtml instances.
 *
 * This function is considered "package private", i.e. calls (using "suppress
 * visibility") from other files within this package are considered acceptable.
 * DO NOT call this function from outside the goog.html package; use appropriate
 * wrappers instead.
 *
 * @param {string} html The string to initialize the SafeHtml object with.
 * @param {?goog.i18n.bidi.Dir} dir The directionality of the SafeHtml to be
 *     constructed, or null if unknown.
 * @return {!goog.html.SafeHtml} The initialized SafeHtml object.
 * @private
 */
goog.html.SafeHtml.createSafeHtmlSecurityPrivateDoNotAccessOrElse_ = function(
    html, dir) {
  var safeHtml = new goog.html.SafeHtml();
  safeHtml.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = html;
  safeHtml.dir_ = dir;
  return safeHtml;
};


/**
 * A SafeHtml instance corresponding to the empty string.
 * @const {!goog.html.SafeHtml}
 */
goog.html.SafeHtml.EMPTY =
    goog.html.SafeHtml.createSafeHtmlSecurityPrivateDoNotAccessOrElse_(
        '', goog.i18n.bidi.Dir.NEUTRAL);