source: trunk/www.guidonia.net/wp/wp-includes/kses.php@ 44

Last change on this file since 44 was 44, checked in by luciano, 14 years ago
File size: 33.7 KB
Line 
1<?php
2/**
3 * HTML/XHTML filter that only allows some elements and attributes
4 *
5 * Added wp_ prefix to avoid conflicts with existing kses users
6 *
7 * @version 0.2.2
8 * @copyright (C) 2002, 2003, 2005
9 * @author Ulf Harnhammar <metaur@users.sourceforge.net>
10 *
11 * @package External
12 * @subpackage KSES
13 *
14 * @internal
15 * *** CONTACT INFORMATION ***
16 * E-mail: metaur at users dot sourceforge dot net
17 * Web page: http://sourceforge.net/projects/kses
18 * Paper mail: Ulf Harnhammar
19 * Ymergatan 17 C
20 * 753 25 Uppsala
21 * SWEDEN
22 *
23 * [kses strips evil scripts!]
24 */
25
26/**
27 * You can override this in your my-hacks.php file You can also override this
28 * in a plugin file. The my-hacks.php is deprecated in its usage.
29 *
30 * @since 1.2.0
31 */
32if (!defined('CUSTOM_TAGS'))
33 define('CUSTOM_TAGS', false);
34
35if (!CUSTOM_TAGS) {
36 /**
37 * Kses global for default allowable HTML tags.
38 *
39 * Can be override by using CUSTOM_TAGS constant.
40 *
41 * @global array $allowedposttags
42 * @since 2.0.0
43 */
44 $allowedposttags = array(
45 'address' => array(),
46 'a' => array(
47 'class' => array (),
48 'href' => array (),
49 'id' => array (),
50 'title' => array (),
51 'rel' => array (),
52 'rev' => array (),
53 'name' => array (),
54 'target' => array()),
55 'abbr' => array(
56 'class' => array (),
57 'title' => array ()),
58 'acronym' => array(
59 'title' => array ()),
60 'b' => array(),
61 'big' => array(),
62 'blockquote' => array(
63 'id' => array (),
64 'cite' => array (),
65 'class' => array(),
66 'lang' => array(),
67 'xml:lang' => array()),
68 'br' => array (
69 'class' => array ()),
70 'button' => array(
71 'disabled' => array (),
72 'name' => array (),
73 'type' => array (),
74 'value' => array ()),
75 'caption' => array(
76 'align' => array (),
77 'class' => array ()),
78 'cite' => array (
79 'class' => array(),
80 'dir' => array(),
81 'lang' => array(),
82 'title' => array ()),
83 'code' => array (
84 'style' => array()),
85 'col' => array(
86 'align' => array (),
87 'char' => array (),
88 'charoff' => array (),
89 'span' => array (),
90 'dir' => array(),
91 'style' => array (),
92 'valign' => array (),
93 'width' => array ()),
94 'del' => array(
95 'datetime' => array ()),
96 'dd' => array(),
97 'div' => array(
98 'align' => array (),
99 'class' => array (),
100 'dir' => array (),
101 'lang' => array(),
102 'style' => array (),
103 'xml:lang' => array()),
104 'dl' => array(),
105 'dt' => array(),
106 'em' => array(),
107 'fieldset' => array(),
108 'font' => array(
109 'color' => array (),
110 'face' => array (),
111 'size' => array ()),
112 'form' => array(
113 'action' => array (),
114 'accept' => array (),
115 'accept-charset' => array (),
116 'enctype' => array (),
117 'method' => array (),
118 'name' => array (),
119 'target' => array ()),
120 'h1' => array(
121 'align' => array (),
122 'class' => array (),
123 'id' => array (),
124 'style' => array ()),
125 'h2' => array (
126 'align' => array (),
127 'class' => array (),
128 'id' => array (),
129 'style' => array ()),
130 'h3' => array (
131 'align' => array (),
132 'class' => array (),
133 'id' => array (),
134 'style' => array ()),
135 'h4' => array (
136 'align' => array (),
137 'class' => array (),
138 'id' => array (),
139 'style' => array ()),
140 'h5' => array (
141 'align' => array (),
142 'class' => array (),
143 'id' => array (),
144 'style' => array ()),
145 'h6' => array (
146 'align' => array (),
147 'class' => array (),
148 'id' => array (),
149 'style' => array ()),
150 'hr' => array (
151 'align' => array (),
152 'class' => array (),
153 'noshade' => array (),
154 'size' => array (),
155 'width' => array ()),
156 'i' => array(),
157 'img' => array(
158 'alt' => array (),
159 'align' => array (),
160 'border' => array (),
161 'class' => array (),
162 'height' => array (),
163 'hspace' => array (),
164 'longdesc' => array (),
165 'vspace' => array (),
166 'src' => array (),
167 'style' => array (),
168 'width' => array ()),
169 'ins' => array(
170 'datetime' => array (),
171 'cite' => array ()),
172 'kbd' => array(),
173 'label' => array(
174 'for' => array ()),
175 'legend' => array(
176 'align' => array ()),
177 'li' => array (
178 'align' => array (),
179 'class' => array ()),
180 'p' => array(
181 'class' => array (),
182 'align' => array (),
183 'dir' => array(),
184 'lang' => array(),
185 'style' => array (),
186 'xml:lang' => array()),
187 'pre' => array(
188 'style' => array(),
189 'width' => array ()),
190 'q' => array(
191 'cite' => array ()),
192 's' => array(),
193 'span' => array (
194 'class' => array (),
195 'dir' => array (),
196 'align' => array (),
197 'lang' => array (),
198 'style' => array (),
199 'title' => array (),
200 'xml:lang' => array()),
201 'strike' => array(),
202 'strong' => array(),
203 'sub' => array(),
204 'sup' => array(),
205 'table' => array(
206 'align' => array (),
207 'bgcolor' => array (),
208 'border' => array (),
209 'cellpadding' => array (),
210 'cellspacing' => array (),
211 'class' => array (),
212 'dir' => array(),
213 'id' => array(),
214 'rules' => array (),
215 'style' => array (),
216 'summary' => array (),
217 'width' => array ()),
218 'tbody' => array(
219 'align' => array (),
220 'char' => array (),
221 'charoff' => array (),
222 'valign' => array ()),
223 'td' => array(
224 'abbr' => array (),
225 'align' => array (),
226 'axis' => array (),
227 'bgcolor' => array (),
228 'char' => array (),
229 'charoff' => array (),
230 'class' => array (),
231 'colspan' => array (),
232 'dir' => array(),
233 'headers' => array (),
234 'height' => array (),
235 'nowrap' => array (),
236 'rowspan' => array (),
237 'scope' => array (),
238 'style' => array (),
239 'valign' => array (),
240 'width' => array ()),
241 'textarea' => array(
242 'cols' => array (),
243 'rows' => array (),
244 'disabled' => array (),
245 'name' => array (),
246 'readonly' => array ()),
247 'tfoot' => array(
248 'align' => array (),
249 'char' => array (),
250 'class' => array (),
251 'charoff' => array (),
252 'valign' => array ()),
253 'th' => array(
254 'abbr' => array (),
255 'align' => array (),
256 'axis' => array (),
257 'bgcolor' => array (),
258 'char' => array (),
259 'charoff' => array (),
260 'class' => array (),
261 'colspan' => array (),
262 'headers' => array (),
263 'height' => array (),
264 'nowrap' => array (),
265 'rowspan' => array (),
266 'scope' => array (),
267 'valign' => array (),
268 'width' => array ()),
269 'thead' => array(
270 'align' => array (),
271 'char' => array (),
272 'charoff' => array (),
273 'class' => array (),
274 'valign' => array ()),
275 'title' => array(),
276 'tr' => array(
277 'align' => array (),
278 'bgcolor' => array (),
279 'char' => array (),
280 'charoff' => array (),
281 'class' => array (),
282 'style' => array (),
283 'valign' => array ()),
284 'tt' => array(),
285 'u' => array(),
286 'ul' => array (
287 'class' => array (),
288 'style' => array (),
289 'type' => array ()),
290 'ol' => array (
291 'class' => array (),
292 'start' => array (),
293 'style' => array (),
294 'type' => array ()),
295 'var' => array ());
296
297 /**
298 * Kses allowed HTML elements.
299 *
300 * @global array $allowedtags
301 * @since 1.0.0
302 */
303 $allowedtags = array(
304 'a' => array(
305 'href' => array (),
306 'title' => array ()),
307 'abbr' => array(
308 'title' => array ()),
309 'acronym' => array(
310 'title' => array ()),
311 'b' => array(),
312 'blockquote' => array(
313 'cite' => array ()),
314 // 'br' => array(),
315 'cite' => array (),
316 'code' => array(),
317 'del' => array(
318 'datetime' => array ()),
319 // 'dd' => array(),
320 // 'dl' => array(),
321 // 'dt' => array(),
322 'em' => array (), 'i' => array (),
323 // 'ins' => array('datetime' => array(), 'cite' => array()),
324 // 'li' => array(),
325 // 'ol' => array(),
326 // 'p' => array(),
327 'q' => array(
328 'cite' => array ()),
329 'strike' => array(),
330 'strong' => array(),
331 // 'sub' => array(),
332 // 'sup' => array(),
333 // 'u' => array(),
334 // 'ul' => array(),
335 );
336}
337
338/**
339 * Filters content and keeps only allowable HTML elements.
340 *
341 * This function makes sure that only the allowed HTML element names, attribute
342 * names and attribute values plus only sane HTML entities will occur in
343 * $string. You have to remove any slashes from PHP's magic quotes before you
344 * call this function.
345 *
346 * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
347 * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common
348 * link protocols, except for 'javascript' which should not be allowed for
349 * untrusted users.
350 *
351 * @since 1.0.0
352 *
353 * @param string $string Content to filter through kses
354 * @param array $allowed_html List of allowed HTML elements
355 * @param array $allowed_protocols Optional. Allowed protocol in links.
356 * @return string Filtered content with only allowed HTML elements
357 */
358function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) {
359 $string = wp_kses_no_null($string);
360 $string = wp_kses_js_entities($string);
361 $string = wp_kses_normalize_entities($string);
362 $allowed_html_fixed = wp_kses_array_lc($allowed_html);
363 $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook
364 return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols);
365}
366
367/**
368 * You add any kses hooks here.
369 *
370 * There is currently only one kses WordPress hook and it is called here. All
371 * parameters are passed to the hooks and expected to recieve a string.
372 *
373 * @since 1.0.0
374 *
375 * @param string $string Content to filter through kses
376 * @param array $allowed_html List of allowed HTML elements
377 * @param array $allowed_protocols Allowed protocol in links
378 * @return string Filtered content through 'pre_kses' hook
379 */
380function wp_kses_hook($string, $allowed_html, $allowed_protocols) {
381 $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols);
382 return $string;
383}
384
385/**
386 * This function returns kses' version number.
387 *
388 * @since 1.0.0
389 *
390 * @return string KSES Version Number
391 */
392function wp_kses_version() {
393 return '0.2.2';
394}
395
396/**
397 * Searches for HTML tags, no matter how malformed.
398 *
399 * It also matches stray ">" characters.
400 *
401 * @since 1.0.0
402 *
403 * @param string $string Content to filter
404 * @param array $allowed_html Allowed HTML elements
405 * @param array $allowed_protocols Allowed protocols to keep
406 * @return string Content with fixed HTML tags
407 */
408function wp_kses_split($string, $allowed_html, $allowed_protocols) {
409 global $pass_allowed_html, $pass_allowed_protocols;
410 $pass_allowed_html = $allowed_html;
411 $pass_allowed_protocols = $allowed_protocols;
412 return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%',
413 create_function('$match', 'global $pass_allowed_html, $pass_allowed_protocols; return wp_kses_split2($match[1], $pass_allowed_html, $pass_allowed_protocols);'), $string);
414}
415
416/**
417 * Callback for wp_kses_split for fixing malformed HTML tags.
418 *
419 * This function does a lot of work. It rejects some very malformed things like
420 * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
421 * strip_tags()!). Otherwise it splits the tag into an element and an attribute
422 * list.
423 *
424 * After the tag is split into an element and an attribute list, it is run
425 * through another filter which will remove illegal attributes and once that is
426 * completed, will be returned.
427 *
428 * @access private
429 * @since 1.0.0
430 * @uses wp_kses_attr()
431 *
432 * @param string $string Content to filter
433 * @param array $allowed_html Allowed HTML elements
434 * @param array $allowed_protocols Allowed protocols to keep
435 * @return string Fixed HTML element
436 */
437function wp_kses_split2($string, $allowed_html, $allowed_protocols) {
438 $string = wp_kses_stripslashes($string);
439
440 if (substr($string, 0, 1) != '<')
441 return '&gt;';
442 # It matched a ">" character
443
444 if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) {
445 $string = str_replace(array('<!--', '-->'), '', $matches[1]);
446 while ( $string != $newstring = wp_kses($string, $allowed_html, $allowed_protocols) )
447 $string = $newstring;
448 if ( $string == '' )
449 return '';
450 // prevent multiple dashes in comments
451 $string = preg_replace('/--+/', '-', $string);
452 // prevent three dashes closing a comment
453 $string = preg_replace('/-$/', '', $string);
454 return "<!--{$string}-->";
455 }
456 # Allow HTML comments
457
458 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
459 return '';
460 # It's seriously malformed
461
462 $slash = trim($matches[1]);
463 $elem = $matches[2];
464 $attrlist = $matches[3];
465
466 if (!@isset($allowed_html[strtolower($elem)]))
467 return '';
468 # They are using a not allowed HTML element
469
470 if ($slash != '')
471 return "<$slash$elem>";
472 # No attributes are allowed for closing elements
473
474 return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols);
475}
476
477/**
478 * Removes all attributes, if none are allowed for this element.
479 *
480 * If some are allowed it calls wp_kses_hair() to split them further, and then
481 * it builds up new HTML code from the data that kses_hair() returns. It also
482 * removes "<" and ">" characters, if there are any left. One more thing it does
483 * is to check if the tag has a closing XHTML slash, and if it does, it puts one
484 * in the returned code as well.
485 *
486 * @since 1.0.0
487 *
488 * @param string $element HTML element/tag
489 * @param string $attr HTML attributes from HTML element to closing HTML element tag
490 * @param array $allowed_html Allowed HTML elements
491 * @param array $allowed_protocols Allowed protocols to keep
492 * @return string Sanitized HTML element
493 */
494function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) {
495 # Is there a closing XHTML slash at the end of the attributes?
496
497 $xhtml_slash = '';
498 if (preg_match('%\s/\s*$%', $attr))
499 $xhtml_slash = ' /';
500
501 # Are any attributes allowed at all for this element?
502
503 if (@ count($allowed_html[strtolower($element)]) == 0)
504 return "<$element$xhtml_slash>";
505
506 # Split it
507
508 $attrarr = wp_kses_hair($attr, $allowed_protocols);
509
510 # Go through $attrarr, and save the allowed attributes for this element
511 # in $attr2
512
513 $attr2 = '';
514
515 foreach ($attrarr as $arreach) {
516 if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])]))
517 continue; # the attribute is not allowed
518
519 $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])];
520 if ($current == '')
521 continue; # the attribute is not allowed
522
523 if (!is_array($current))
524 $attr2 .= ' '.$arreach['whole'];
525 # there are no checks
526
527 else {
528 # there are some checks
529 $ok = true;
530 foreach ($current as $currkey => $currval)
531 if (!wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) {
532 $ok = false;
533 break;
534 }
535
536 if ( $arreach['name'] == 'style' ) {
537 $orig_value = $arreach['value'];
538
539 $value = safecss_filter_attr($orig_value);
540
541 if ( empty($value) )
542 continue;
543
544 $arreach['value'] = $value;
545
546 $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']);
547 }
548
549 if ($ok)
550 $attr2 .= ' '.$arreach['whole']; # it passed them
551 } # if !is_array($current)
552 } # foreach
553
554 # Remove any "<" or ">" characters
555
556 $attr2 = preg_replace('/[<>]/', '', $attr2);
557
558 return "<$element$attr2$xhtml_slash>";
559}
560
561/**
562 * Builds an attribute list from string containing attributes.
563 *
564 * This function does a lot of work. It parses an attribute list into an array
565 * with attribute data, and tries to do the right thing even if it gets weird
566 * input. It will add quotes around attribute values that don't have any quotes
567 * or apostrophes around them, to make it easier to produce HTML code that will
568 * conform to W3C's HTML specification. It will also remove bad URL protocols
569 * from attribute values. It also reduces duplicate attributes by using the
570 * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
571 *
572 * @since 1.0.0
573 *
574 * @param string $attr Attribute list from HTML element to closing HTML element tag
575 * @param array $allowed_protocols Allowed protocols to keep
576 * @return array List of attributes after parsing
577 */
578function wp_kses_hair($attr, $allowed_protocols) {
579 $attrarr = array ();
580 $mode = 0;
581 $attrname = '';
582 $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
583
584 # Loop through the whole attribute list
585
586 while (strlen($attr) != 0) {
587 $working = 0; # Was the last operation successful?
588
589 switch ($mode) {
590 case 0 : # attribute name, href for instance
591
592 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
593 $attrname = $match[1];
594 $working = $mode = 1;
595 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
596 }
597
598 break;
599
600 case 1 : # equals sign or valueless ("selected")
601
602 if (preg_match('/^\s*=\s*/', $attr)) # equals sign
603 {
604 $working = 1;
605 $mode = 2;
606 $attr = preg_replace('/^\s*=\s*/', '', $attr);
607 break;
608 }
609
610 if (preg_match('/^\s+/', $attr)) # valueless
611 {
612 $working = 1;
613 $mode = 0;
614 if(FALSE === array_key_exists($attrname, $attrarr)) {
615 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
616 }
617 $attr = preg_replace('/^\s+/', '', $attr);
618 }
619
620 break;
621
622 case 2 : # attribute value, a URL after href= for instance
623
624 if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
625 # "value"
626 {
627 $thisval = $match[1];
628 if ( in_array($attrname, $uris) )
629 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
630
631 if(FALSE === array_key_exists($attrname, $attrarr)) {
632 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
633 }
634 $working = 1;
635 $mode = 0;
636 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
637 break;
638 }
639
640 if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
641 # 'value'
642 {
643 $thisval = $match[1];
644 if ( in_array($attrname, $uris) )
645 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
646
647 if(FALSE === array_key_exists($attrname, $attrarr)) {
648 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
649 }
650 $working = 1;
651 $mode = 0;
652 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
653 break;
654 }
655
656 if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
657 # value
658 {
659 $thisval = $match[1];
660 if ( in_array($attrname, $uris) )
661 $thisval = wp_kses_bad_protocol($thisval, $allowed_protocols);
662
663 if(FALSE === array_key_exists($attrname, $attrarr)) {
664 $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
665 }
666 # We add quotes to conform to W3C's HTML spec.
667 $working = 1;
668 $mode = 0;
669 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
670 }
671
672 break;
673 } # switch
674
675 if ($working == 0) # not well formed, remove and try again
676 {
677 $attr = wp_kses_html_error($attr);
678 $mode = 0;
679 }
680 } # while
681
682 if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
683 # special case, for when the attribute list ends with a valueless
684 # attribute like "selected"
685 $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
686
687 return $attrarr;
688}
689
690/**
691 * Performs different checks for attribute values.
692 *
693 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
694 * and "valueless" with even more checks to come soon.
695 *
696 * @since 1.0.0
697 *
698 * @param string $value Attribute value
699 * @param string $vless Whether the value is valueless or not. Use 'y' or 'n'
700 * @param string $checkname What $checkvalue is checking for.
701 * @param mixed $checkvalue What constraint the value should pass
702 * @return bool Whether check passes (true) or not (false)
703 */
704function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) {
705 $ok = true;
706
707 switch (strtolower($checkname)) {
708 case 'maxlen' :
709 # The maxlen check makes sure that the attribute value has a length not
710 # greater than the given value. This can be used to avoid Buffer Overflows
711 # in WWW clients and various Internet servers.
712
713 if (strlen($value) > $checkvalue)
714 $ok = false;
715 break;
716
717 case 'minlen' :
718 # The minlen check makes sure that the attribute value has a length not
719 # smaller than the given value.
720
721 if (strlen($value) < $checkvalue)
722 $ok = false;
723 break;
724
725 case 'maxval' :
726 # The maxval check does two things: it checks that the attribute value is
727 # an integer from 0 and up, without an excessive amount of zeroes or
728 # whitespace (to avoid Buffer Overflows). It also checks that the attribute
729 # value is not greater than the given value.
730 # This check can be used to avoid Denial of Service attacks.
731
732 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
733 $ok = false;
734 if ($value > $checkvalue)
735 $ok = false;
736 break;
737
738 case 'minval' :
739 # The minval check checks that the attribute value is a positive integer,
740 # and that it is not smaller than the given value.
741
742 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
743 $ok = false;
744 if ($value < $checkvalue)
745 $ok = false;
746 break;
747
748 case 'valueless' :
749 # The valueless check checks if the attribute has a value
750 # (like <a href="blah">) or not (<option selected>). If the given value
751 # is a "y" or a "Y", the attribute must not have a value.
752 # If the given value is an "n" or an "N", the attribute must have one.
753
754 if (strtolower($checkvalue) != $vless)
755 $ok = false;
756 break;
757 } # switch
758
759 return $ok;
760}
761
762/**
763 * Sanitize string from bad protocols.
764 *
765 * This function removes all non-allowed protocols from the beginning of
766 * $string. It ignores whitespace and the case of the letters, and it does
767 * understand HTML entities. It does its work in a while loop, so it won't be
768 * fooled by a string like "javascript:javascript:alert(57)".
769 *
770 * @since 1.0.0
771 *
772 * @param string $string Content to filter bad protocols from
773 * @param array $allowed_protocols Allowed protocols to keep
774 * @return string Filtered content
775 */
776function wp_kses_bad_protocol($string, $allowed_protocols) {
777 $string = wp_kses_no_null($string);
778 $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
779 $string2 = $string.'a';
780
781 while ($string != $string2) {
782 $string2 = $string;
783 $string = wp_kses_bad_protocol_once($string, $allowed_protocols);
784 } # while
785
786 return $string;
787}
788
789/**
790 * Removes any NULL characters in $string.
791 *
792 * @since 1.0.0
793 *
794 * @param string $string
795 * @return string
796 */
797function wp_kses_no_null($string) {
798 $string = preg_replace('/\0+/', '', $string);
799 $string = preg_replace('/(\\\\0)+/', '', $string);
800
801 return $string;
802}
803
804/**
805 * Strips slashes from in front of quotes.
806 *
807 * This function changes the character sequence \" to just ". It leaves all
808 * other slashes alone. It's really weird, but the quoting from
809 * preg_replace(//e) seems to require this.
810 *
811 * @since 1.0.0
812 *
813 * @param string $string String to strip slashes
814 * @return string Fixed strings with quoted slashes
815 */
816function wp_kses_stripslashes($string) {
817 return preg_replace('%\\\\"%', '"', $string);
818}
819
820/**
821 * Goes through an array and changes the keys to all lower case.
822 *
823 * @since 1.0.0
824 *
825 * @param array $inarray Unfiltered array
826 * @return array Fixed array with all lowercase keys
827 */
828function wp_kses_array_lc($inarray) {
829 $outarray = array ();
830
831 foreach ( (array) $inarray as $inkey => $inval) {
832 $outkey = strtolower($inkey);
833 $outarray[$outkey] = array ();
834
835 foreach ( (array) $inval as $inkey2 => $inval2) {
836 $outkey2 = strtolower($inkey2);
837 $outarray[$outkey][$outkey2] = $inval2;
838 } # foreach $inval
839 } # foreach $inarray
840
841 return $outarray;
842}
843
844/**
845 * Removes the HTML JavaScript entities found in early versions of Netscape 4.
846 *
847 * @since 1.0.0
848 *
849 * @param string $string
850 * @return string
851 */
852function wp_kses_js_entities($string) {
853 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
854}
855
856/**
857 * Handles parsing errors in wp_kses_hair().
858 *
859 * The general plan is to remove everything to and including some whitespace,
860 * but it deals with quotes and apostrophes as well.
861 *
862 * @since 1.0.0
863 *
864 * @param string $string
865 * @return string
866 */
867function wp_kses_html_error($string) {
868 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
869}
870
871/**
872 * Sanitizes content from bad protocols and other characters.
873 *
874 * This function searches for URL protocols at the beginning of $string, while
875 * handling whitespace and HTML entities.
876 *
877 * @since 1.0.0
878 *
879 * @param string $string Content to check for bad protocols
880 * @param string $allowed_protocols Allowed protocols
881 * @return string Sanitized content
882 */
883function wp_kses_bad_protocol_once($string, $allowed_protocols) {
884 global $_kses_allowed_protocols;
885 $_kses_allowed_protocols = $allowed_protocols;
886
887 $string2 = preg_split('/:|&#58;|&#x3a;/i', $string, 2);
888 if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) )
889 $string = wp_kses_bad_protocol_once2($string2[0]) . trim($string2[1]);
890 else
891 $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|&#58;|&#[Xx]3[Aa];)\s*/', 'wp_kses_bad_protocol_once2', $string);
892
893 return $string;
894}
895
896/**
897 * Callback for wp_kses_bad_protocol_once() regular expression.
898 *
899 * This function processes URL protocols, checks to see if they're in the
900 * white-list or not, and returns different data depending on the answer.
901 *
902 * @access private
903 * @since 1.0.0
904 *
905 * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols
906 * @return string Sanitized content
907 */
908function wp_kses_bad_protocol_once2($matches) {
909 global $_kses_allowed_protocols;
910
911 if ( is_array($matches) ) {
912 if ( ! isset($matches[1]) || empty($matches[1]) )
913 return '';
914
915 $string = $matches[1];
916 } else {
917 $string = $matches;
918 }
919
920 $string2 = wp_kses_decode_entities($string);
921 $string2 = preg_replace('/\s/', '', $string2);
922 $string2 = wp_kses_no_null($string2);
923 $string2 = preg_replace('/\xad+/', '', $string2);
924 # deals with Opera "feature"
925 $string2 = strtolower($string2);
926
927 $allowed = false;
928 foreach ( (array) $_kses_allowed_protocols as $one_protocol)
929 if (strtolower($one_protocol) == $string2) {
930 $allowed = true;
931 break;
932 }
933
934 if ($allowed)
935 return "$string2:";
936 else
937 return '';
938}
939
940/**
941 * Converts and fixes HTML entities.
942 *
943 * This function normalizes HTML entities. It will convert "AT&T" to the correct
944 * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
945 *
946 * @since 1.0.0
947 *
948 * @param string $string Content to normalize entities
949 * @return string Content with normalized entities
950 */
951function wp_kses_normalize_entities($string) {
952 # Disarm all entities by converting & to &amp;
953
954 $string = str_replace('&', '&amp;', $string);
955
956 # Change back the allowed entities in our entity whitelist
957
958 $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
959 $string = preg_replace_callback('/&amp;#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);
960 $string = preg_replace_callback('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);
961
962 return $string;
963}
964
965/**
966 * Callback for wp_kses_normalize_entities() regular expression.
967 *
968 * This function helps wp_kses_normalize_entities() to only accept 16 bit values
969 * and nothing more for &#number; entities.
970 *
971 * @access private
972 * @since 1.0.0
973 *
974 * @param array $matches preg_replace_callback() matches array
975 * @return string Correctly encoded entity
976 */
977function wp_kses_normalize_entities2($matches) {
978 if ( ! isset($matches[1]) || empty($matches[1]) )
979 return '';
980
981 $i = $matches[1];
982 return ( ( ! valid_unicode($i) ) || ($i > 65535) ? "&amp;#$i;" : "&#$i;" );
983}
984
985/**
986 * Callback for wp_kses_normalize_entities() for regular expression.
987 *
988 * This function helps wp_kses_normalize_entities() to only accept valid Unicode
989 * numeric entities in hex form.
990 *
991 * @access private
992 *
993 * @param array $matches preg_replace_callback() matches array
994 * @return string Correctly encoded entity
995 */
996function wp_kses_normalize_entities3($matches) {
997 if ( ! isset($matches[2]) || empty($matches[2]) )
998 return '';
999
1000 $hexchars = $matches[2];
1001 return ( ( ! valid_unicode(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : "&#x$hexchars;" );
1002}
1003
1004/**
1005 * Helper function to determine if a Unicode value is valid.
1006 *
1007 * @param int $i Unicode value
1008 * @return bool true if the value was a valid Unicode number
1009 */
1010function valid_unicode($i) {
1011 return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
1012 ($i >= 0x20 && $i <= 0xd7ff) ||
1013 ($i >= 0xe000 && $i <= 0xfffd) ||
1014 ($i >= 0x10000 && $i <= 0x10ffff) );
1015}
1016
1017/**
1018 * Convert all entities to their character counterparts.
1019 *
1020 * This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't do
1021 * anything with other entities like &auml;, but we don't need them in the URL
1022 * protocol whitelisting system anyway.
1023 *
1024 * @since 1.0.0
1025 *
1026 * @param string $string Content to change entities
1027 * @return string Content after decoded entities
1028 */
1029function wp_kses_decode_entities($string) {
1030 $string = preg_replace_callback('/&#([0-9]+);/', create_function('$match', 'return chr($match[1]);'), $string);
1031 $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', create_function('$match', 'return chr(hexdec($match[1]));'), $string);
1032
1033 return $string;
1034}
1035
1036/**
1037 * Sanitize content with allowed HTML Kses rules.
1038 *
1039 * @since 1.0.0
1040 * @uses $allowedtags
1041 *
1042 * @param string $data Content to filter
1043 * @return string Filtered content
1044 */
1045function wp_filter_kses($data) {
1046 global $allowedtags;
1047 return addslashes( wp_kses(stripslashes( $data ), $allowedtags) );
1048}
1049
1050/**
1051 * Sanitize content for allowed HTML tags for post content.
1052 *
1053 * Post content refers to the page contents of the 'post' type and not $_POST
1054 * data from forms.
1055 *
1056 * @since 2.0.0
1057 * @uses $allowedposttags
1058 *
1059 * @param string $data Post content to filter
1060 * @return string Filtered post content with allowed HTML tags and attributes intact.
1061 */
1062function wp_filter_post_kses($data) {
1063 global $allowedposttags;
1064 return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) );
1065}
1066
1067/**
1068 * Strips all of the HTML in the content.
1069 *
1070 * @since 2.1.0
1071 *
1072 * @param string $data Content to strip all HTML from
1073 * @return string Filtered content without any HTML
1074 */
1075function wp_filter_nohtml_kses($data) {
1076 return addslashes ( wp_kses(stripslashes( $data ), array()) );
1077}
1078
1079/**
1080 * Adds all Kses input form content filters.
1081 *
1082 * All hooks have default priority. The wp_filter_kses() function is added to
1083 * the 'pre_comment_content' and 'title_save_pre' hooks.
1084 *
1085 * The wp_filter_post_kses() function is added to the 'content_save_pre',
1086 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks.
1087 *
1088 * @since 2.0.0
1089 * @uses add_filter() See description for what functions are added to what hooks.
1090 */
1091function kses_init_filters() {
1092 // Normal filtering.
1093 add_filter('pre_comment_content', 'wp_filter_kses');
1094 add_filter('title_save_pre', 'wp_filter_kses');
1095
1096 // Post filtering
1097 add_filter('content_save_pre', 'wp_filter_post_kses');
1098 add_filter('excerpt_save_pre', 'wp_filter_post_kses');
1099 add_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1100}
1101
1102/**
1103 * Removes all Kses input form content filters.
1104 *
1105 * A quick procedural method to removing all of the filters that kses uses for
1106 * content in WordPress Loop.
1107 *
1108 * Does not remove the kses_init() function from 'init' hook (priority is
1109 * default). Also does not remove kses_init() function from 'set_current_user'
1110 * hook (priority is also default).
1111 *
1112 * @since 2.0.6
1113 */
1114function kses_remove_filters() {
1115 // Normal filtering.
1116 remove_filter('pre_comment_content', 'wp_filter_kses');
1117 remove_filter('title_save_pre', 'wp_filter_kses');
1118
1119 // Post filtering
1120 remove_filter('content_save_pre', 'wp_filter_post_kses');
1121 remove_filter('excerpt_save_pre', 'wp_filter_post_kses');
1122 remove_filter('content_filtered_save_pre', 'wp_filter_post_kses');
1123}
1124
1125/**
1126 * Sets up most of the Kses filters for input form content.
1127 *
1128 * If you remove the kses_init() function from 'init' hook and
1129 * 'set_current_user' (priority is default), then none of the Kses filter hooks
1130 * will be added.
1131 *
1132 * First removes all of the Kses filters in case the current user does not need
1133 * to have Kses filter the content. If the user does not have unfiltered html
1134 * capability, then Kses filters are added.
1135 *
1136 * @uses kses_remove_filters() Removes the Kses filters
1137 * @uses kses_init_filters() Adds the Kses filters back if the user
1138 * does not have unfiltered HTML capability.
1139 * @since 2.0.0
1140 */
1141function kses_init() {
1142 kses_remove_filters();
1143
1144 if (current_user_can('unfiltered_html') == false)
1145 kses_init_filters();
1146}
1147
1148add_action('init', 'kses_init');
1149add_action('set_current_user', 'kses_init');
1150
1151function safecss_filter_attr( $css, $deprecated = '' ) {
1152 $css = wp_kses_no_null($css);
1153 $css = str_replace(array("\n","\r","\t"), '', $css);
1154
1155 if ( preg_match( '%[\\(&]|/\*%', $css ) ) // remove any inline css containing \ ( & or comments
1156 return '';
1157
1158 $css_array = split( ';', trim( $css ) );
1159 $allowed_attr = apply_filters( 'safe_style_css', array( 'text-align', 'margin', 'color', 'float',
1160 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1161 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1162 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1163 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1164 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1165 'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1166 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
1167 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1168 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1169 'width' ) );
1170
1171 if ( empty($allowed_attr) )
1172 return $css;
1173
1174 $css = '';
1175 foreach ( $css_array as $css_item ) {
1176 if ( $css_item == '' )
1177 continue;
1178 $css_item = trim( $css_item );
1179 $found = false;
1180 if ( strpos( $css_item, ':' ) === false ) {
1181 $found = true;
1182 } else {
1183 $parts = split( ':', $css_item );
1184 if ( in_array( trim( $parts[0] ), $allowed_attr ) )
1185 $found = true;
1186 }
1187 if ( $found ) {
1188 if( $css != '' )
1189 $css .= ';';
1190 $css .= $css_item;
1191 }
1192 }
1193
1194 return $css;
1195}
Note: See TracBrowser for help on using the repository browser.