1 <?php
2
3 /**
4 * Escapes a variable.
5 *
6 * This function is adapted from code coming from Twig.
7 * (c) Fabien Potencier
8 * https://github.com/twigphp/Twig/blob/69633fc19189699d20114f005efc8851c3fe9288/lib/Twig/Extension/Core.php#L900-L1127
9 *
10 * @package redaxo\core
11 *
12 * @param mixed $value The value to escape
13 * @param string $strategy Supported strategies:
14 * "html": escapes a string for the HTML context.
15 * "html_attr": escapes a string for the HTML attrubute context. It is only necessary for dynamic attribute names and attribute values without quotes (`data-foo=bar`). For attribute values within quotes you can use default strategy "html".
16 * "js": escapes a string for the JavaScript/JSON context.
17 * "css": escapes a string for the CSS context. CSS escaping can be applied to any string being inserted into CSS and escapes everything except alphanumerics.
18 * "url": escapes a string for the URI or parameter contexts. This should not be used to escape an entire URI; only a subcomponent being inserted.
19 *
20 * @throws InvalidArgumentException
21 *
22 * @return mixed
23 */
24 function rex_escape($value, $strategy = 'html')
25 {
26 if (!is_string($value)) {
27 if (is_array($value)) {
28 foreach ($value as $k => $v) {
29 $value[$k] = rex_escape($v, $strategy);
30 }
31
32 return $value;
33 }
34
35 if ($value instanceof \stdClass) {
36 foreach (get_object_vars($value) as $k => $v) {
37 $value->$k = rex_escape($v, $strategy);
38 }
39
40 return $value;
41 }
42
43 if (is_object($value) && method_exists($value, '__toString')) {
44 $value = (string) $value;
45 } else {
46 return $value;
47 }
48 }
49
50 $string = $value;
51
52 if ('' === $string) {
53 return '';
54 }
55
56 switch ($strategy) {
57 case 'html':
58 // see https://secure.php.net/htmlspecialchars
59 return htmlspecialchars($string, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
60
61 case 'js':
62 // escape all non-alphanumeric characters
63 // into their \xHH or \uHHHH representations
64
65 if (0 === strlen($string) ? false : 1 !== preg_match('/^./su', $string)) {
66 throw new InvalidArgumentException('The string to escape is not a valid UTF-8 string.');
67 }
68
69 $string = preg_replace_callback('#[^a-zA-Z0-9,\._]#Su', function ($matches) {
70 $char = $matches[0];
71
72 /*
73 * A few characters have short escape sequences in JSON and JavaScript.
74 * Escape sequences supported only by JavaScript, not JSON, are ommitted.
75 * \" is also supported but omitted, because the resulting string is not HTML safe.
76 */
77 static $shortMap = [
78 '\\' => '\\\\',
79 '/' => '\\/',
80 "\x08" => '\b',
81 "\x0C" => '\f',
82 "\x0A" => '\n',
83 "\x0D" => '\r',
84 "\x09" => '\t',
85 ];
86
87 if (isset($shortMap[$char])) {
88 return $shortMap[$char];
89 }
90
91 // \uHHHH
92 $char = mb_convert_encoding($char, 'UTF-16BE', 'UTF-8');
93 $char = strtoupper(bin2hex($char));
94
95 if (4 >= strlen($char)) {
96 return sprintf('\u%04s', $char);
97 }
98
99 return sprintf('\u%04s\u%04s', substr($char, 0, -4), substr($char, -4));
100 }, $string);
101
102 return $string;
103
104 case 'css':
105 if (!preg_match('//u', $string)) {
106 throw new InvalidArgumentException('The string to escape is not a valid UTF-8 string.');
107 }
108
109 $string = preg_replace_callback('#[^a-zA-Z0-9]#Su', function ($matches) {
110 $char = $matches[0];
111
112 return sprintf('\\%X ', 1 === strlen($char) ? ord($char) : mb_ord($char, 'UTF-8'));
113 }, $string);
114
115 return $string;
116
117 case 'html_attr':
118 if (!preg_match('//u', $string)) {
119 throw new InvalidArgumentException('The string to escape is not a valid UTF-8 string.');
120 }
121
122 $string = preg_replace_callback('#[^a-zA-Z0-9,\.\-_]#Su', function ($matches) {
123 /**
124 * This function is adapted from code coming from Zend Framework.
125 *
126 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (https://www.zend.com)
127 * @license https://framework.zend.com/license/new-bsd New BSD License
128 */
129 $chr = $matches[0];
130 $ord = ord($chr);
131
132 /*
133 * The following replaces characters undefined in HTML with the
134 * hex entity for the Unicode replacement character.
135 */
136 if (($ord <= 0x1f && "\t" != $chr && "\n" != $chr && "\r" != $chr) || ($ord >= 0x7f && $ord <= 0x9f)) {
137 return '�';
138 }
139
140 /*
141 * Check if the current character to escape has a name entity we should
142 * replace it with while grabbing the hex value of the character.
143 */
144 if (1 === strlen($chr)) {
145 /*
146 * While HTML supports far more named entities, the lowest common denominator
147 * has become HTML5's XML Serialisation which is restricted to the those named
148 * entities that XML supports. Using HTML entities would result in this error:
149 * XML Parsing Error: undefined entity
150 */
151 static $entityMap = [
152 34 => '"', /* quotation mark */
153 38 => '&', /* ampersand */
154 60 => '<', /* less-than sign */
155 62 => '>', /* greater-than sign */
156 ];
157
158 if (isset($entityMap[$ord])) {
159 return $entityMap[$ord];
160 }
161
162 return sprintf('&#x%02X;', $ord);
163 }
164
165 /*
166 * Per OWASP recommendations, we'll use hex entities for any other
167 * characters where a named entity does not exist.
168 */
169 return sprintf('&#x%04X;', mb_ord($chr, 'UTF-8'));
170 }, $string);
171
172 return $string;
173
174 case 'url':
175 return rawurlencode($string);
176
177 default:
178 throw new InvalidArgumentException(sprintf('Invalid escaping strategy "%s" (valid ones: "html", "html_attr", "css", "js", "url").', $strategy));
179 }
180 }
181