1 <?php
  2 
  3 /**
  4  * Escapes a variable.
  5  *
  6  * This function is adapted from code coming from Twig.
  7  * (c) Fabien Potencier
  8  * https://github.com/twigphp/Twig/blob/69633fc19189699d20114f005efc8851c3fe9288/lib/Twig/Extension/Core.php#L900-L1127
  9  *
 10  * @package redaxo\core
 11  *
 12  * @param mixed  $value    The value to escape
 13  * @param string $strategy Supported strategies:
 14  *                         "html": escapes a string for the HTML context.
 15  *                         "html_attr": escapes a string for the HTML attrubute context. It is only necessary for dynamic attribute names and attribute values without quotes (`data-foo=bar`). For attribute values within quotes you can use default strategy "html".
 16  *                         "js": escapes a string for the JavaScript/JSON context.
 17  *                         "css": escapes a string for the CSS context. CSS escaping can be applied to any string being inserted into CSS and escapes everything except alphanumerics.
 18  *                         "url": escapes a string for the URI or parameter contexts. This should not be used to escape an entire URI; only a subcomponent being inserted.
 19  *
 20  * @throws InvalidArgumentException
 21  *
 22  * @return mixed
 23  */
 24 function rex_escape($value, $strategy = 'html')
 25 {
 26     if (!is_string($value)) {
 27         if (is_array($value)) {
 28             foreach ($value as $k => $v) {
 29                 $value[$k] = rex_escape($v, $strategy);
 30             }
 31 
 32             return $value;
 33         }
 34 
 35         if ($value instanceof \stdClass) {
 36             foreach (get_object_vars($value) as $k => $v) {
 37                 $value->$k = rex_escape($v, $strategy);
 38             }
 39 
 40             return $value;
 41         }
 42 
 43         if (is_object($value) && method_exists($value, '__toString')) {
 44             $value = (string) $value;
 45         } else {
 46             return $value;
 47         }
 48     }
 49 
 50     $string = $value;
 51 
 52     if ('' === $string) {
 53         return '';
 54     }
 55 
 56     switch ($strategy) {
 57         case 'html':
 58             // see https://secure.php.net/htmlspecialchars
 59             return htmlspecialchars($string, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
 60 
 61         case 'js':
 62             // escape all non-alphanumeric characters
 63             // into their \xHH or \uHHHH representations
 64 
 65             if (0 === strlen($string) ? false : 1 !== preg_match('/^./su', $string)) {
 66                 throw new InvalidArgumentException('The string to escape is not a valid UTF-8 string.');
 67             }
 68 
 69             $string = preg_replace_callback('#[^a-zA-Z0-9,\._]#Su', function ($matches) {
 70                 $char = $matches[0];
 71 
 72                 /*
 73                  * A few characters have short escape sequences in JSON and JavaScript.
 74                  * Escape sequences supported only by JavaScript, not JSON, are ommitted.
 75                  * \" is also supported but omitted, because the resulting string is not HTML safe.
 76                  */
 77                 static $shortMap = [
 78                     '\\' => '\\\\',
 79                     '/' => '\\/',
 80                     "\x08" => '\b',
 81                     "\x0C" => '\f',
 82                     "\x0A" => '\n',
 83                     "\x0D" => '\r',
 84                     "\x09" => '\t',
 85                 ];
 86 
 87                 if (isset($shortMap[$char])) {
 88                     return $shortMap[$char];
 89                 }
 90 
 91                 // \uHHHH
 92                 $char = mb_convert_encoding($char, 'UTF-16BE', 'UTF-8');
 93                 $char = strtoupper(bin2hex($char));
 94 
 95                 if (4 >= strlen($char)) {
 96                     return sprintf('\u%04s', $char);
 97                 }
 98 
 99                 return sprintf('\u%04s\u%04s', substr($char, 0, -4), substr($char, -4));
100             }, $string);
101 
102             return $string;
103 
104         case 'css':
105             if (!preg_match('//u', $string)) {
106                 throw new InvalidArgumentException('The string to escape is not a valid UTF-8 string.');
107             }
108 
109             $string = preg_replace_callback('#[^a-zA-Z0-9]#Su', function ($matches) {
110                 $char = $matches[0];
111 
112                 return sprintf('\\%X ', 1 === strlen($char) ? ord($char) : mb_ord($char, 'UTF-8'));
113             }, $string);
114 
115             return $string;
116 
117         case 'html_attr':
118             if (!preg_match('//u', $string)) {
119                 throw new InvalidArgumentException('The string to escape is not a valid UTF-8 string.');
120             }
121 
122             $string = preg_replace_callback('#[^a-zA-Z0-9,\.\-_]#Su', function ($matches) {
123                 /**
124                  * This function is adapted from code coming from Zend Framework.
125                  *
126                  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (https://www.zend.com)
127                  * @license   https://framework.zend.com/license/new-bsd New BSD License
128                  */
129                 $chr = $matches[0];
130                 $ord = ord($chr);
131 
132                 /*
133                  * The following replaces characters undefined in HTML with the
134                  * hex entity for the Unicode replacement character.
135                  */
136                 if (($ord <= 0x1f && "\t" != $chr && "\n" != $chr && "\r" != $chr) || ($ord >= 0x7f && $ord <= 0x9f)) {
137                     return '&#xFFFD;';
138                 }
139 
140                 /*
141                  * Check if the current character to escape has a name entity we should
142                  * replace it with while grabbing the hex value of the character.
143                  */
144                 if (1 === strlen($chr)) {
145                     /*
146                      * While HTML supports far more named entities, the lowest common denominator
147                      * has become HTML5's XML Serialisation which is restricted to the those named
148                      * entities that XML supports. Using HTML entities would result in this error:
149                      *     XML Parsing Error: undefined entity
150                      */
151                     static $entityMap = [
152                         34 => '&quot;', /* quotation mark */
153                         38 => '&amp;',  /* ampersand */
154                         60 => '&lt;',   /* less-than sign */
155                         62 => '&gt;',   /* greater-than sign */
156                     ];
157 
158                     if (isset($entityMap[$ord])) {
159                         return $entityMap[$ord];
160                     }
161 
162                     return sprintf('&#x%02X;', $ord);
163                 }
164 
165                 /*
166                  * Per OWASP recommendations, we'll use hex entities for any other
167                  * characters where a named entity does not exist.
168                  */
169                 return sprintf('&#x%04X;', mb_ord($chr, 'UTF-8'));
170             }, $string);
171 
172             return $string;
173 
174         case 'url':
175             return rawurlencode($string);
176 
177         default:
178             throw new InvalidArgumentException(sprintf('Invalid escaping strategy "%s" (valid ones: "html", "html_attr", "css", "js", "url").', $strategy));
179     }
180 }
181