[44] | 1 | <?php
|
---|
| 2 | /* Project: MagpieRSS: a simple RSS integration tool
|
---|
| 3 | * File: A compiled file for RSS syndication
|
---|
| 4 | * Author: Kellan Elliot-McCrea <kellan@protest.net>
|
---|
| 5 | * WordPress development team <http://www.wordpress.org/>
|
---|
| 6 | * Charles Johnson <technophilia@radgeek.com>
|
---|
| 7 | * Version: 2009.0618
|
---|
| 8 | * License: GPL
|
---|
| 9 | *
|
---|
| 10 | * Provenance:
|
---|
| 11 | *
|
---|
| 12 | * This is a drop-in replacement for the `rss-functions.php` provided with the
|
---|
| 13 | * WordPress 1.5 distribution, which upgrades the version of MagpieRSS from 0.51
|
---|
| 14 | * to 0.8a. The update improves handling of character encoding, supports
|
---|
| 15 | * multiple categories for posts (using <dc:subject> or <category>), supports
|
---|
| 16 | * Atom 1.0, and implements many other useful features. The file is derived from
|
---|
| 17 | * a combination of (1) the WordPress development team's modifications to
|
---|
| 18 | * MagpieRSS 0.51 and (2) the latest bleeding-edge updates to the "official"
|
---|
| 19 | * MagpieRSS software, including Kellan's original work and some substantial
|
---|
| 20 | * updates by Charles Johnson. All possible through the magic of the GPL. Yay
|
---|
| 21 | * for free software!
|
---|
| 22 | *
|
---|
| 23 | * Differences from the main branch of MagpieRSS:
|
---|
| 24 | *
|
---|
| 25 | * 1. Everything in rss_parse.inc, rss_fetch.inc, rss_cache.inc, and
|
---|
| 26 | * rss_utils.inc is included in one file.
|
---|
| 27 | *
|
---|
| 28 | * 2. MagpieRSS returns the WordPress version as the user agent, rather than
|
---|
| 29 | * Magpie
|
---|
| 30 | *
|
---|
| 31 | * 3. class RSSCache is a modified version by WordPress developers, which
|
---|
| 32 | * caches feeds in the WordPress database (in the options table), rather
|
---|
| 33 | * than writing external files directly.
|
---|
| 34 | *
|
---|
| 35 | * 4. There are two WordPress-specific functions, get_rss() and wp_rss()
|
---|
| 36 | *
|
---|
| 37 | * Differences from the version of MagpieRSS packaged with WordPress:
|
---|
| 38 | *
|
---|
| 39 | * 1. Support for translation between multiple character encodings. Under
|
---|
| 40 | * PHP 5 this is very nicely handled by the XML parsing library. Under PHP
|
---|
| 41 | * 4 we need to do a little bit of work ourselves, using either iconv or
|
---|
| 42 | * mb_convert_encoding if it is not one of the (extremely limited) number
|
---|
| 43 | * of character sets that PHP 4's XML module can handle natively.
|
---|
| 44 | *
|
---|
| 45 | * 2. Numerous bug fixes.
|
---|
| 46 | *
|
---|
| 47 | * 3. The parser class MagpieRSS has been substantially revised to better
|
---|
| 48 | * support popular features such as enclosures and multiple categories,
|
---|
| 49 | * and to support the new Atom 1.0 IETF standard. (Atom feeds are
|
---|
| 50 | * normalized so as to make the data available using terminology from
|
---|
| 51 | * either Atom 0.3 or Atom 1.0. Atom 0.3 backward-compatibility is provided
|
---|
| 52 | * to allow existing software to easily begin accepting Atom 1.0 data; new
|
---|
| 53 | * software SHOULD NOT depend on the 0.3 terminology, but rather use the
|
---|
| 54 | * normalization as a convenient way to keep supporting 0.3 feeds while
|
---|
| 55 | * they linger in the world.)
|
---|
| 56 | *
|
---|
| 57 | * The upgraded MagpieRSS can also now handle some content constructs that
|
---|
| 58 | * had not been handled well by previous versions of Magpie (such as the
|
---|
| 59 | * use of namespaced XHTML in <xhtml:body> or <xhtml:div> elements to
|
---|
| 60 | * provide the full content of posts in RSS 2.0 feeds).
|
---|
| 61 | *
|
---|
| 62 | * Unlike previous versions of MagpieRSS, this version can parse multiple
|
---|
| 63 | * instances of the same child element in item/entry and channel/feed
|
---|
| 64 | * containers. This is done using simple counters next to the element
|
---|
| 65 | * names: the first <category> element on an RSS item, for example, can be
|
---|
| 66 | * found in $item['category'] (thus preserving backward compatibility); the
|
---|
| 67 | * second in $item['category#2'], the third in $item['category#3'], and so
|
---|
| 68 | * on. The number of categories applied to the item can be found in
|
---|
| 69 | * $item['category#']
|
---|
| 70 | *
|
---|
| 71 | * Also unlike previous versions of MagpieRSS, this version allows you to
|
---|
| 72 | * access the values of elements' attributes as well as the content they
|
---|
| 73 | * contain. This can be done using a simple syntax inspired by XPath: to
|
---|
| 74 | * access the type attribute of an RSS 2.0 enclosure, for example, you
|
---|
| 75 | * need only access `$item['enclosure@type']`. A comma-separated list of
|
---|
| 76 | * attributes for the enclosure element is stored in `$item['enclosure@']`.
|
---|
| 77 | * (This syntax interacts easily with the syntax for multiple categories;
|
---|
| 78 | * for example, the value of the `scheme` attribute for the fourth category
|
---|
| 79 | * element on a particular item is stored in `$item['category#4@scheme']`.)
|
---|
| 80 | *
|
---|
| 81 | * Note also that this implementation IS NOT backward-compatible with the
|
---|
| 82 | * kludges that were used to hack in support for multiple categories and
|
---|
| 83 | * for enclosures in upgraded versions of MagpieRSS distributed with
|
---|
| 84 | * previous versions of FeedWordPress. If your hacks or filter plugins
|
---|
| 85 | * depended on the old way of doing things... well, I warned you that they
|
---|
| 86 | * might not be permanent. Sorry!
|
---|
| 87 | */
|
---|
| 88 |
|
---|
| 89 | define('RSS', 'RSS');
|
---|
| 90 | define('ATOM', 'Atom');
|
---|
| 91 |
|
---|
| 92 | ################################################################################
|
---|
| 93 | ## WordPress: make some settings WordPress-appropriate #########################
|
---|
| 94 | ################################################################################
|
---|
| 95 |
|
---|
| 96 | define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version . '(+http://www.wordpress.org)');
|
---|
| 97 |
|
---|
| 98 | $wp_encoding = get_settings('blog_charset');
|
---|
| 99 | define('MAGPIE_OUTPUT_ENCODING', ($wp_encoding?$wp_encoding:'ISO-8859-1'));
|
---|
| 100 |
|
---|
| 101 | ################################################################################
|
---|
| 102 | ## rss_parse.inc: from MagpieRSS 0.85 ##########################################
|
---|
| 103 | ################################################################################
|
---|
| 104 |
|
---|
| 105 | /**
|
---|
| 106 | * Hybrid parser, and object, takes RSS as a string and returns a simple object.
|
---|
| 107 | *
|
---|
| 108 | * see: rss_fetch.inc for a simpler interface with integrated caching support
|
---|
| 109 | *
|
---|
| 110 | */
|
---|
| 111 | class MagpieRSS {
|
---|
| 112 | var $parser;
|
---|
| 113 |
|
---|
| 114 | var $current_item = array(); // item currently being parsed
|
---|
| 115 | var $items = array(); // collection of parsed items
|
---|
| 116 | var $channel = array(); // hash of channel fields
|
---|
| 117 | var $textinput = array();
|
---|
| 118 | var $image = array();
|
---|
| 119 | var $feed_type;
|
---|
| 120 | var $feed_version;
|
---|
| 121 | var $encoding = ''; // output encoding of parsed rss
|
---|
| 122 |
|
---|
| 123 | var $_source_encoding = ''; // only set if we have to parse xml prolog
|
---|
| 124 |
|
---|
| 125 | var $ERROR = "";
|
---|
| 126 | var $WARNING = "";
|
---|
| 127 |
|
---|
| 128 | // define some constants
|
---|
| 129 | var $_XMLNS_FAMILIAR = array (
|
---|
| 130 | 'http://www.w3.org/2005/Atom' => 'atom' /* 1.0 */,
|
---|
| 131 | 'http://purl.org/atom/ns#' => 'atom' /* pre-1.0 */,
|
---|
| 132 | 'http://purl.org/rss/1.0/' => 'rss' /* 1.0 */,
|
---|
| 133 | 'http://backend.userland.com/RSS2' => 'rss' /* 2.0 */,
|
---|
| 134 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
|
---|
| 135 | 'http://www.w3.org/1999/xhtml' => 'xhtml',
|
---|
| 136 | 'http://purl.org/dc/elements/1.1/' => 'dc',
|
---|
| 137 | 'http://purl.org/dc/terms/' => 'dcterms',
|
---|
| 138 | 'http://purl.org/rss/1.0/modules/content/' => 'content',
|
---|
| 139 | 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
|
---|
| 140 | 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
|
---|
| 141 | 'http://purl.org/rss/1.0/modules/dc/' => 'dc',
|
---|
| 142 | 'http://wellformedweb.org/CommentAPI/' => 'wfw',
|
---|
| 143 | 'http://webns.net/mvcb/' => 'admin',
|
---|
| 144 | 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
|
---|
| 145 | 'http://xmlns.com/foaf/0.1/' => 'foaf',
|
---|
| 146 | 'http://madskills.com/public/xml/rss/module/trackback/' => 'trackback',
|
---|
| 147 | 'http://web.resource.org/cc/' => 'cc',
|
---|
| 148 | 'http://search.yahoo.com/mrss' => 'media',
|
---|
| 149 | );
|
---|
| 150 |
|
---|
| 151 | var $_XMLBASE_RESOLVE = array (
|
---|
| 152 | // Atom 0.3 and 1.0 xml:base support
|
---|
| 153 | 'atom' => array (
|
---|
| 154 | 'link' => array ('href' => true),
|
---|
| 155 | 'content' => array ('src' => true, '*xml' => true, '*html' => true),
|
---|
| 156 | 'summary' => array ('*xml' => true, '*html' => true),
|
---|
| 157 | 'title' => array ('*xml' => true, '*html' => true),
|
---|
| 158 | 'rights' => array ('*xml' => true, '*html' => true),
|
---|
| 159 | 'subtitle' => array ('*xml' => true, '*html' => true),
|
---|
| 160 | 'info' => array('*xml' => true, '*html' => true),
|
---|
| 161 | 'tagline' => array('*xml' => true, '*html' => true),
|
---|
| 162 | 'copyright' => array ('*xml' => true, '*html' => true),
|
---|
| 163 | 'generator' => array ('uri' => true, 'url' => true),
|
---|
| 164 | 'uri' => array ('*content' => true),
|
---|
| 165 | 'url' => array ('*content' => true),
|
---|
| 166 | 'icon' => array ('*content' => true),
|
---|
| 167 | 'logo' => array ('*content' => true),
|
---|
| 168 | ),
|
---|
| 169 |
|
---|
| 170 | // for inline namespaced XHTML
|
---|
| 171 | 'xhtml' => array (
|
---|
| 172 | 'a' => array ('href' => true),
|
---|
| 173 | 'applet' => array('codebase' => true),
|
---|
| 174 | 'area' => array('href' => true),
|
---|
| 175 | 'blockquote' => array('cite' => true),
|
---|
| 176 | 'body' => array('background' => true),
|
---|
| 177 | 'del' => array('cite' => true),
|
---|
| 178 | 'form' => array('action' => true),
|
---|
| 179 | 'frame' => array('longdesc' => true, 'src' => true),
|
---|
| 180 | 'iframe' => array('longdesc' => true, 'iframe' => true, 'src' => true),
|
---|
| 181 | 'head' => array('profile' => true),
|
---|
| 182 | 'img' => array('longdesc' => true, 'src' => true, 'usemap' => true),
|
---|
| 183 | 'input' => array('src' => true, 'usemap' => true),
|
---|
| 184 | 'ins' => array('cite' => true),
|
---|
| 185 | 'link' => array('href' => true),
|
---|
| 186 | 'object' => array('classid' => true, 'codebase' => true, 'data' => true, 'usemap' => true),
|
---|
| 187 | 'q' => array('cite' => true),
|
---|
| 188 | 'script' => array('src' => true),
|
---|
| 189 | ),
|
---|
| 190 | );
|
---|
| 191 |
|
---|
| 192 | var $_ATOM_CONTENT_CONSTRUCTS = array(
|
---|
| 193 | 'content', 'summary', 'title', /* common */
|
---|
| 194 | 'info', 'tagline', 'copyright', /* Atom 0.3 */
|
---|
| 195 | 'rights', 'subtitle', /* Atom 1.0 */
|
---|
| 196 | );
|
---|
| 197 | var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div');
|
---|
| 198 | var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
|
---|
| 199 |
|
---|
| 200 | // parser variables, useless if you're not a parser, treat as private
|
---|
| 201 | var $stack = array('element' => array (), 'ns' => array (), 'xmlns' => array (), 'xml:base' => array ()); // stack of XML data
|
---|
| 202 |
|
---|
| 203 | var $inchannel = false;
|
---|
| 204 | var $initem = false;
|
---|
| 205 |
|
---|
| 206 | var $incontent = array(); // non-empty if in namespaced XML content field
|
---|
| 207 | var $xml_escape = false; // true when accepting namespaced XML
|
---|
| 208 | var $exclude_top = false; // true when Atom 1.0 type="xhtml"
|
---|
| 209 |
|
---|
| 210 | var $intextinput = false;
|
---|
| 211 | var $inimage = false;
|
---|
| 212 | var $root_namespaces = array();
|
---|
| 213 | var $current_namespace = false;
|
---|
| 214 | var $working_namespace_table = array();
|
---|
| 215 |
|
---|
| 216 | /**
|
---|
| 217 | * Set up XML parser, parse source, and return populated RSS object..
|
---|
| 218 | *
|
---|
| 219 | * @param string $source string containing the RSS to be parsed
|
---|
| 220 | *
|
---|
| 221 | * NOTE: Probably a good idea to leave the encoding options alone unless
|
---|
| 222 | * you know what you're doing as PHP's character set support is
|
---|
| 223 | * a little weird.
|
---|
| 224 | *
|
---|
| 225 | * NOTE: A lot of this is unnecessary but harmless with PHP5
|
---|
| 226 | *
|
---|
| 227 | *
|
---|
| 228 | * @param string $output_encoding output the parsed RSS in this character
|
---|
| 229 | * set defaults to ISO-8859-1 as this is PHP's
|
---|
| 230 | * default.
|
---|
| 231 | *
|
---|
| 232 | * NOTE: might be changed to UTF-8 in future
|
---|
| 233 | * versions.
|
---|
| 234 | *
|
---|
| 235 | * @param string $input_encoding the character set of the incoming RSS source.
|
---|
| 236 | * Leave blank and Magpie will try to figure it
|
---|
| 237 | * out.
|
---|
| 238 | *
|
---|
| 239 | *
|
---|
| 240 | * @param bool $detect_encoding if false Magpie won't attempt to detect
|
---|
| 241 | * source encoding. (caveat emptor)
|
---|
| 242 | *
|
---|
| 243 | */
|
---|
| 244 | function MagpieRSS ($source, $output_encoding='ISO-8859-1',
|
---|
| 245 | $input_encoding=null, $detect_encoding=true, $base_uri=null)
|
---|
| 246 | {
|
---|
| 247 | # if PHP xml isn't compiled in, die
|
---|
| 248 | #
|
---|
| 249 | if (!function_exists('xml_parser_create')) {
|
---|
| 250 | $this->error( "Failed to load PHP's XML Extension. " .
|
---|
| 251 | "http://www.php.net/manual/en/ref.xml.php",
|
---|
| 252 | E_USER_ERROR );
|
---|
| 253 | }
|
---|
| 254 |
|
---|
| 255 | list($parser, $source) = $this->create_parser($source,
|
---|
| 256 | $output_encoding, $input_encoding, $detect_encoding);
|
---|
| 257 |
|
---|
| 258 |
|
---|
| 259 | if (!is_resource($parser)) {
|
---|
| 260 | $this->error( "Failed to create an instance of PHP's XML parser. " .
|
---|
| 261 | "http://www.php.net/manual/en/ref.xml.php",
|
---|
| 262 | E_USER_ERROR );
|
---|
| 263 | }
|
---|
| 264 |
|
---|
| 265 |
|
---|
| 266 | $this->parser = $parser;
|
---|
| 267 |
|
---|
| 268 | # pass in parser, and a reference to this object
|
---|
| 269 | # setup handlers
|
---|
| 270 | #
|
---|
| 271 | xml_set_object( $this->parser, $this );
|
---|
| 272 | xml_set_element_handler($this->parser,
|
---|
| 273 | 'feed_start_element', 'feed_end_element' );
|
---|
| 274 |
|
---|
| 275 | xml_set_character_data_handler( $this->parser, 'feed_cdata' );
|
---|
| 276 |
|
---|
| 277 | $this->stack['xml:base'] = array($base_uri);
|
---|
| 278 |
|
---|
| 279 | $status = xml_parse( $this->parser, $source );
|
---|
| 280 |
|
---|
| 281 | if (! $status ) {
|
---|
| 282 | $errorcode = xml_get_error_code( $this->parser );
|
---|
| 283 | if ( $errorcode != XML_ERROR_NONE ) {
|
---|
| 284 | $xml_error = xml_error_string( $errorcode );
|
---|
| 285 | $error_line = xml_get_current_line_number($this->parser);
|
---|
| 286 | $error_col = xml_get_current_column_number($this->parser);
|
---|
| 287 | $errormsg = "$xml_error at line $error_line, column $error_col";
|
---|
| 288 |
|
---|
| 289 | $this->error( $errormsg );
|
---|
| 290 | }
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | xml_parser_free( $this->parser );
|
---|
| 294 |
|
---|
| 295 | $this->normalize();
|
---|
| 296 | }
|
---|
| 297 |
|
---|
| 298 | function feed_start_element($p, $element, &$attributes) {
|
---|
| 299 | $el = strtolower($element);
|
---|
| 300 |
|
---|
| 301 | $namespaces = end($this->stack['xmlns']);
|
---|
| 302 | $baseuri = end($this->stack['xml:base']);
|
---|
| 303 |
|
---|
| 304 | if (isset($attributes['xml:base'])) {
|
---|
| 305 | $baseuri = Relative_URI::resolve($attributes['xml:base'], $baseuri);
|
---|
| 306 | }
|
---|
| 307 | array_push($this->stack['xml:base'], $baseuri);
|
---|
| 308 |
|
---|
| 309 | // scan for xml namespace declarations. ugly ugly ugly.
|
---|
| 310 | // theoretically we could use xml_set_start_namespace_decl_handler and
|
---|
| 311 | // xml_set_end_namespace_decl_handler to handle this more elegantly, but
|
---|
| 312 | // support for these is buggy
|
---|
| 313 | foreach ($attributes as $attr => $value) {
|
---|
| 314 | if ( preg_match('/^xmlns(\:([A-Z_a-z].*))?$/', $attr, $match) ) {
|
---|
| 315 | $ns = (isset($match[2]) ? $match[2] : '');
|
---|
| 316 | $namespaces[$ns] = $value;
|
---|
| 317 | }
|
---|
| 318 | }
|
---|
| 319 |
|
---|
| 320 | array_push($this->stack['xmlns'], $namespaces);
|
---|
| 321 |
|
---|
| 322 | // check for a namespace, and split if found
|
---|
| 323 | // Don't munge content tags
|
---|
| 324 | $ns = $this->namespace($element);
|
---|
| 325 | if ( empty($this->incontent) ) {
|
---|
| 326 | $el = strtolower($ns['element']);
|
---|
| 327 | $this->current_namespace = $ns['effective'];
|
---|
| 328 | array_push($this->stack['ns'], $ns['effective']);
|
---|
| 329 | }
|
---|
| 330 |
|
---|
| 331 | $nsc = $ns['canonical']; $nse = $ns['element'];
|
---|
| 332 | if ( isset($this->_XMLBASE_RESOLVE[$nsc][$nse]) ) {
|
---|
| 333 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*xml'])) {
|
---|
| 334 | $attributes['xml:base'] = $baseuri;
|
---|
| 335 | }
|
---|
| 336 | foreach ($attributes as $key => $value) {
|
---|
| 337 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse][strtolower($key)])) {
|
---|
| 338 | $attributes[$key] = Relative_URI::resolve($attributes[$key], $baseuri);
|
---|
| 339 | }
|
---|
| 340 | }
|
---|
| 341 | }
|
---|
| 342 |
|
---|
| 343 | $attrs = array_change_key_case($attributes, CASE_LOWER);
|
---|
| 344 |
|
---|
| 345 | # if feed type isn't set, then this is first element of feed
|
---|
| 346 | # identify feed from root element
|
---|
| 347 | #
|
---|
| 348 | if (!isset($this->feed_type) ) {
|
---|
| 349 | if ( $el == 'rdf' ) {
|
---|
| 350 | $this->feed_type = RSS;
|
---|
| 351 | $this->root_namespaces = array('rss', 'rdf');
|
---|
| 352 | $this->feed_version = '1.0';
|
---|
| 353 | }
|
---|
| 354 | elseif ( $el == 'rss' ) {
|
---|
| 355 | $this->feed_type = RSS;
|
---|
| 356 | $this->root_namespaces = array('rss');
|
---|
| 357 | $this->feed_version = $attrs['version'];
|
---|
| 358 | }
|
---|
| 359 | elseif ( $el == 'feed' ) {
|
---|
| 360 | $this->feed_type = ATOM;
|
---|
| 361 | $this->root_namespaces = array('atom');
|
---|
| 362 | if ($ns['uri'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0
|
---|
| 363 | $this->feed_version = '1.0';
|
---|
| 364 | }
|
---|
| 365 | else { // Atom 0.3, probably.
|
---|
| 366 | $this->feed_version = $attrs['version'];
|
---|
| 367 | }
|
---|
| 368 | $this->inchannel = true;
|
---|
| 369 | }
|
---|
| 370 | return;
|
---|
| 371 | }
|
---|
| 372 |
|
---|
| 373 | // if we're inside a namespaced content construct, treat tags as text
|
---|
| 374 | if ( !empty($this->incontent) )
|
---|
| 375 | {
|
---|
| 376 | if ((count($this->incontent) > 1) or !$this->exclude_top) {
|
---|
| 377 | if ($ns['effective']=='xhtml') {
|
---|
| 378 | $tag = $ns['element'];
|
---|
| 379 | }
|
---|
| 380 | else {
|
---|
| 381 | $tag = $element;
|
---|
| 382 | $xmlns = 'xmlns';
|
---|
| 383 | if (strlen($ns['prefix'])>0) {
|
---|
| 384 | $xmlns = $xmlns . ':' . $ns['prefix'];
|
---|
| 385 | }
|
---|
| 386 | $attributes[$xmlns] = $ns['uri']; // make sure it's visible
|
---|
| 387 | }
|
---|
| 388 |
|
---|
| 389 | // if tags are inlined, then flatten
|
---|
| 390 | $attrs_str = join(' ',
|
---|
| 391 | array_map(array($this, 'map_attrs'),
|
---|
| 392 | array_keys($attributes),
|
---|
| 393 | array_values($attributes) )
|
---|
| 394 | );
|
---|
| 395 |
|
---|
| 396 | if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; }
|
---|
| 397 | $this->append_content( "<{$tag}{$attrs_str}>" );
|
---|
| 398 | }
|
---|
| 399 | array_push($this->incontent, $ns); // stack for parsing content XML
|
---|
| 400 | }
|
---|
| 401 |
|
---|
| 402 | elseif ( $el == 'channel' ) {
|
---|
| 403 | $this->inchannel = true;
|
---|
| 404 | }
|
---|
| 405 |
|
---|
| 406 | elseif ($el == 'item' or $el == 'entry' )
|
---|
| 407 | {
|
---|
| 408 | $this->initem = true;
|
---|
| 409 | if ( isset($attrs['rdf:about']) ) {
|
---|
| 410 | $this->current_item['about'] = $attrs['rdf:about'];
|
---|
| 411 | }
|
---|
| 412 | }
|
---|
| 413 |
|
---|
| 414 | // if we're in the default namespace of an RSS feed,
|
---|
| 415 | // record textinput or image fields
|
---|
| 416 | elseif (
|
---|
| 417 | $this->feed_type == RSS and
|
---|
| 418 | $this->current_namespace == '' and
|
---|
| 419 | $el == 'textinput' )
|
---|
| 420 | {
|
---|
| 421 | $this->intextinput = true;
|
---|
| 422 | }
|
---|
| 423 |
|
---|
| 424 | elseif (
|
---|
| 425 | $this->feed_type == RSS and
|
---|
| 426 | $this->current_namespace == '' and
|
---|
| 427 | $el == 'image' )
|
---|
| 428 | {
|
---|
| 429 | $this->inimage = true;
|
---|
| 430 | }
|
---|
| 431 |
|
---|
| 432 | // set stack[0] to current element
|
---|
| 433 | else {
|
---|
| 434 | // Atom support many links per containing element.
|
---|
| 435 | // Magpie treats link elements of type rel='alternate'
|
---|
| 436 | // as being equivalent to RSS's simple link element.
|
---|
| 437 |
|
---|
| 438 | $atom_link = false;
|
---|
| 439 | if ( ($ns['canonical']=='atom') and $el == 'link') {
|
---|
| 440 | $atom_link = true;
|
---|
| 441 | if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') {
|
---|
| 442 | $el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements
|
---|
| 443 | }
|
---|
| 444 | }
|
---|
| 445 | # handle atom content constructs
|
---|
| 446 | elseif ( ($ns['canonical']=='atom') and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
|
---|
| 447 | {
|
---|
| 448 | // avoid clashing w/ RSS mod_content
|
---|
| 449 | if ($el == 'content' ) {
|
---|
| 450 | $el = 'atom_content';
|
---|
| 451 | }
|
---|
| 452 |
|
---|
| 453 | // assume that everything accepts namespaced XML
|
---|
| 454 | // (that will pass through some non-validating feeds;
|
---|
| 455 | // but so what? this isn't a validating parser)
|
---|
| 456 | $this->incontent = array();
|
---|
| 457 | array_push($this->incontent, $ns); // start a stack
|
---|
| 458 |
|
---|
| 459 | $this->xml_escape = $this->accepts_namespaced_xml($attrs);
|
---|
| 460 |
|
---|
| 461 | if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') {
|
---|
| 462 | $this->exclude_top = true;
|
---|
| 463 | } else {
|
---|
| 464 | $this->exclude_top = false;
|
---|
| 465 | }
|
---|
| 466 | }
|
---|
| 467 | # Handle inline XHTML body elements --CWJ
|
---|
| 468 | elseif ($ns['effective']=='xhtml' and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS)) {
|
---|
| 469 | $this->current_namespace = 'xhtml';
|
---|
| 470 | $this->incontent = array();
|
---|
| 471 | array_push($this->incontent, $ns); // start a stack
|
---|
| 472 |
|
---|
| 473 | $this->xml_escape = true;
|
---|
| 474 | $this->exclude_top = false;
|
---|
| 475 | }
|
---|
| 476 |
|
---|
| 477 | array_unshift($this->stack['element'], $el);
|
---|
| 478 | $elpath = join('_', array_reverse($this->stack['element']));
|
---|
| 479 |
|
---|
| 480 | $n = $this->element_count($elpath);
|
---|
| 481 | $this->element_count($elpath, $n+1);
|
---|
| 482 |
|
---|
| 483 | if ($n > 0) {
|
---|
| 484 | array_shift($this->stack['element']);
|
---|
| 485 | array_unshift($this->stack['element'], $el.'#'.($n+1));
|
---|
| 486 | $elpath = join('_', array_reverse($this->stack['element']));
|
---|
| 487 | }
|
---|
| 488 |
|
---|
| 489 | // this makes the baby Jesus cry, but we can't do it in normalize()
|
---|
| 490 | // because we've made the element name for Atom links unpredictable
|
---|
| 491 | // by tacking on the relation to the end. -CWJ
|
---|
| 492 | if ($atom_link and isset($attrs['href'])) {
|
---|
| 493 | $this->append($elpath, $attrs['href']);
|
---|
| 494 | }
|
---|
| 495 |
|
---|
| 496 | // add attributes
|
---|
| 497 | if (count($attrs) > 0) {
|
---|
| 498 | $this->append($elpath.'@', join(',', array_keys($attrs)));
|
---|
| 499 | foreach ($attrs as $attr => $value) {
|
---|
| 500 | $this->append($elpath.'@'.$attr, $value);
|
---|
| 501 | }
|
---|
| 502 | }
|
---|
| 503 | }
|
---|
| 504 | }
|
---|
| 505 |
|
---|
| 506 | function feed_cdata ($p, $text) {
|
---|
| 507 | if ($this->incontent) {
|
---|
| 508 | if ($this->xml_escape) { $text = htmlspecialchars($text, ENT_COMPAT, $this->encoding); }
|
---|
| 509 | $this->append_content( $text );
|
---|
| 510 | } else {
|
---|
| 511 | $current_el = join('_', array_reverse($this->stack['element']));
|
---|
| 512 | $this->append($current_el, $text);
|
---|
| 513 | }
|
---|
| 514 | }
|
---|
| 515 |
|
---|
| 516 | function feed_end_element ($p, $el) {
|
---|
| 517 | $closer = $this->namespace($el);
|
---|
| 518 |
|
---|
| 519 | if ( $this->incontent ) {
|
---|
| 520 | $opener = array_pop($this->incontent);
|
---|
| 521 |
|
---|
| 522 | // balance tags properly
|
---|
| 523 | // note: i don't think this is actually neccessary
|
---|
| 524 | if ($opener != $closer) {
|
---|
| 525 | array_push($this->incontent, $opener);
|
---|
| 526 | $this->append_content("<$el />");
|
---|
| 527 | } elseif ($this->incontent) { // are we in the content construct still?
|
---|
| 528 | if ((count($this->incontent) > 1) or !$this->exclude_top) {
|
---|
| 529 | if ($closer['effective']=='xhtml') {
|
---|
| 530 | $tag = $closer['element'];
|
---|
| 531 | }
|
---|
| 532 | else {
|
---|
| 533 | $tag = $el;
|
---|
| 534 | }
|
---|
| 535 | $this->append_content("</$tag>");
|
---|
| 536 | }
|
---|
| 537 | } else { // if we're done with the content construct, shift the opening of the content construct off the normal stack
|
---|
| 538 | array_shift( $this->stack['element'] );
|
---|
| 539 | }
|
---|
| 540 | }
|
---|
| 541 | elseif ($closer['effective'] == '') {
|
---|
| 542 | $el = strtolower($closer['element']);
|
---|
| 543 | if ( $el == 'item' or $el == 'entry' ) {
|
---|
| 544 | $this->items[] = $this->current_item;
|
---|
| 545 | $this->current_item = array();
|
---|
| 546 | $this->initem = false;
|
---|
| 547 | $this->current_category = 0;
|
---|
| 548 | }
|
---|
| 549 | elseif ($this->feed_type == RSS and $el == 'textinput' ) {
|
---|
| 550 | $this->intextinput = false;
|
---|
| 551 | }
|
---|
| 552 | elseif ($this->feed_type == RSS and $el == 'image' ) {
|
---|
| 553 | $this->inimage = false;
|
---|
| 554 | }
|
---|
| 555 | elseif ($el == 'channel' or $el == 'feed' ) {
|
---|
| 556 | $this->inchannel = false;
|
---|
| 557 | } else {
|
---|
| 558 | $nsc = $closer['canonical']; $nse = $closer['element'];
|
---|
| 559 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
|
---|
| 560 | // Resolve relative URI in content of tag
|
---|
| 561 | $this->dereference_current_element();
|
---|
| 562 | }
|
---|
| 563 | array_shift( $this->stack['element'] );
|
---|
| 564 | }
|
---|
| 565 | } else {
|
---|
| 566 | $nsc = $closer['canonical']; $nse = strtolower($closer['element']);
|
---|
| 567 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
|
---|
| 568 | // Resolve relative URI in content of tag
|
---|
| 569 | $this->dereference_current_element();
|
---|
| 570 | }
|
---|
| 571 | array_shift( $this->stack['element'] );
|
---|
| 572 | }
|
---|
| 573 |
|
---|
| 574 | if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ
|
---|
| 575 | $this->current_namespace = array_pop($this->stack['ns']);
|
---|
| 576 | }
|
---|
| 577 | array_pop($this->stack['xmlns']);
|
---|
| 578 | array_pop($this->stack['xml:base']);
|
---|
| 579 | }
|
---|
| 580 |
|
---|
| 581 | // Namespace handling functions
|
---|
| 582 | function namespace ($element) {
|
---|
| 583 | $namespaces = end($this->stack['xmlns']);
|
---|
| 584 | $ns = '';
|
---|
| 585 | if ( strpos( $element, ':' ) ) {
|
---|
| 586 | list($ns, $element) = split( ':', $element, 2);
|
---|
| 587 | }
|
---|
| 588 |
|
---|
| 589 | $uri = (isset($namespaces[$ns]) ? $namespaces[$ns] : null);
|
---|
| 590 |
|
---|
| 591 | if (!is_null($uri)) {
|
---|
| 592 | $canonical = (
|
---|
| 593 | isset($this->_XMLNS_FAMILIAR[$uri])
|
---|
| 594 | ? $this->_XMLNS_FAMILIAR[$uri]
|
---|
| 595 | : $uri
|
---|
| 596 | );
|
---|
| 597 | } else {
|
---|
| 598 | $canonical = $ns;
|
---|
| 599 | }
|
---|
| 600 |
|
---|
| 601 | if (in_array($canonical, $this->root_namespaces)) {
|
---|
| 602 | $effective = '';
|
---|
| 603 | } else {
|
---|
| 604 | $effective = $canonical;
|
---|
| 605 | }
|
---|
| 606 |
|
---|
| 607 | return array('effective' => $effective, 'canonical' => $canonical, 'prefix' => $ns, 'uri' => $uri, 'element' => $element);
|
---|
| 608 | }
|
---|
| 609 |
|
---|
| 610 | // Utility functions for accessing data structure
|
---|
| 611 |
|
---|
| 612 | // for smart, namespace-aware methods...
|
---|
| 613 | function magpie_data ($el, $method, $text = NULL) {
|
---|
| 614 | $ret = NULL;
|
---|
| 615 | if ($el) {
|
---|
| 616 | if (is_array($method)) {
|
---|
| 617 | $el = $this->{$method['key']}($el);
|
---|
| 618 | $method = $method['value'];
|
---|
| 619 | }
|
---|
| 620 |
|
---|
| 621 | if ( $this->current_namespace ) {
|
---|
| 622 | if ( $this->initem ) {
|
---|
| 623 | $ret = $this->{$method} (
|
---|
| 624 | $this->current_item[ $this->current_namespace ][ $el ],
|
---|
| 625 | $text
|
---|
| 626 | );
|
---|
| 627 | }
|
---|
| 628 | elseif ($this->inchannel) {
|
---|
| 629 | $ret = $this->{$method} (
|
---|
| 630 | $this->channel[ $this->current_namespace][ $el ],
|
---|
| 631 | $text
|
---|
| 632 | );
|
---|
| 633 | }
|
---|
| 634 | elseif ($this->intextinput) {
|
---|
| 635 | $ret = $this->{$method} (
|
---|
| 636 | $this->textinput[ $this->current_namespace][ $el ],
|
---|
| 637 | $text
|
---|
| 638 | );
|
---|
| 639 | }
|
---|
| 640 | elseif ($this->inimage) {
|
---|
| 641 | $ret = $this->{$method} (
|
---|
| 642 | $this->image[ $this->current_namespace ][ $el ], $text );
|
---|
| 643 | }
|
---|
| 644 | }
|
---|
| 645 | else {
|
---|
| 646 | if ( $this->initem ) {
|
---|
| 647 | $ret = $this->{$method} (
|
---|
| 648 | $this->current_item[ $el ], $text);
|
---|
| 649 | }
|
---|
| 650 | elseif ($this->intextinput) {
|
---|
| 651 | $ret = $this->{$method} (
|
---|
| 652 | $this->textinput[ $el ], $text );
|
---|
| 653 | }
|
---|
| 654 | elseif ($this->inimage) {
|
---|
| 655 | $ret = $this->{$method} (
|
---|
| 656 | $this->image[ $el ], $text );
|
---|
| 657 | }
|
---|
| 658 | elseif ($this->inchannel) {
|
---|
| 659 | $ret = $this->{$method} (
|
---|
| 660 | $this->channel[ $el ], $text );
|
---|
| 661 | }
|
---|
| 662 | }
|
---|
| 663 | }
|
---|
| 664 | return $ret;
|
---|
| 665 | }
|
---|
| 666 |
|
---|
| 667 | function concat (&$str1, $str2="") {
|
---|
| 668 | if (!isset($str1) ) {
|
---|
| 669 | $str1="";
|
---|
| 670 | }
|
---|
| 671 | $str1 .= $str2;
|
---|
| 672 | }
|
---|
| 673 |
|
---|
| 674 | function retrieve_value (&$el, $text /*ignore*/) {
|
---|
| 675 | return $el;
|
---|
| 676 | }
|
---|
| 677 | function replace_value (&$el, $text) {
|
---|
| 678 | $el = $text;
|
---|
| 679 | }
|
---|
| 680 | function counter_key ($el) {
|
---|
| 681 | return $el.'#';
|
---|
| 682 | }
|
---|
| 683 |
|
---|
| 684 |
|
---|
| 685 | function append_content($text) {
|
---|
| 686 | $construct = reset($this->incontent);
|
---|
| 687 | $ns = $construct['effective'];
|
---|
| 688 |
|
---|
| 689 | // Keeping data about parent elements is necessary to
|
---|
| 690 | // properly handle atom:source and its children elements
|
---|
| 691 | $tag = join('_', array_reverse($this->stack['element']));
|
---|
| 692 |
|
---|
| 693 | if ( $this->initem ) {
|
---|
| 694 | if ($ns) {
|
---|
| 695 | $this->concat( $this->current_item[$ns][$tag], $text );
|
---|
| 696 | } else {
|
---|
| 697 | $this->concat( $this->current_item[$tag], $text );
|
---|
| 698 | }
|
---|
| 699 | }
|
---|
| 700 | elseif ( $this->inchannel ) {
|
---|
| 701 | if ($this->current_namespace) {
|
---|
| 702 | $this->concat( $this->channel[$ns][$tag], $text );
|
---|
| 703 | } else {
|
---|
| 704 | $this->concat( $this->channel[$tag], $text );
|
---|
| 705 | }
|
---|
| 706 | }
|
---|
| 707 | }
|
---|
| 708 |
|
---|
| 709 | // smart append - field and namespace aware
|
---|
| 710 | function append($el, $text) {
|
---|
| 711 | $this->magpie_data($el, 'concat', $text);
|
---|
| 712 | }
|
---|
| 713 |
|
---|
| 714 | function dereference_current_element () {
|
---|
| 715 | $el = join('_', array_reverse($this->stack['element']));
|
---|
| 716 | $base = end($this->stack['xml:base']);
|
---|
| 717 | $uri = $this->magpie_data($el, 'retrieve_value');
|
---|
| 718 | $this->magpie_data($el, 'replace_value', Relative_URI::resolve($uri, $base));
|
---|
| 719 | }
|
---|
| 720 |
|
---|
| 721 | // smart count - field and namespace aware
|
---|
| 722 | function element_count ($el, $set = NULL) {
|
---|
| 723 | if (!is_null($set)) {
|
---|
| 724 | $ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'replace_value'), $set);
|
---|
| 725 | }
|
---|
| 726 | $ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'retrieve_value'));
|
---|
| 727 | return ($ret ? $ret : 0);
|
---|
| 728 | }
|
---|
| 729 |
|
---|
| 730 | function normalize_enclosure (&$source, $from, &$dest, $to, $i) {
|
---|
| 731 | $id_from = $this->element_id($from, $i);
|
---|
| 732 | $id_to = $this->element_id($to, $i);
|
---|
| 733 | if (isset($source["{$id_from}@"])) {
|
---|
| 734 | foreach (explode(',', $source["{$id_from}@"]) as $attr) {
|
---|
| 735 | if ($from=='link_enclosure' and $attr=='href') { // from Atom
|
---|
| 736 | $dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"];
|
---|
| 737 | $dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
|
---|
| 738 | }
|
---|
| 739 | elseif ($from=='enclosure' and $attr=='url') { // from RSS
|
---|
| 740 | $dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"];
|
---|
| 741 | $dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
|
---|
| 742 | }
|
---|
| 743 | else {
|
---|
| 744 | $dest["{$id_to}@{$attr}"] = $source["{$id_from}@{$attr}"];
|
---|
| 745 | }
|
---|
| 746 | }
|
---|
| 747 | }
|
---|
| 748 | }
|
---|
| 749 |
|
---|
| 750 | function normalize_atom_person (&$source, $person, &$dest, $to, $i) {
|
---|
| 751 | $id = $this->element_id($person, $i);
|
---|
| 752 | $id_to = $this->element_id($to, $i);
|
---|
| 753 |
|
---|
| 754 | // Atom 0.3 <=> Atom 1.0
|
---|
| 755 | if ($this->feed_version >= 1.0) { $used = 'uri'; $norm = 'url'; }
|
---|
| 756 | else { $used = 'url'; $norm = 'uri'; }
|
---|
| 757 |
|
---|
| 758 | if (isset($source["{$id}_{$used}"])) {
|
---|
| 759 | $dest["{$id_to}_{$norm}"] = $source["{$id}_{$used}"];
|
---|
| 760 | }
|
---|
| 761 |
|
---|
| 762 | // Atom to RSS 2.0 and Dublin Core
|
---|
| 763 | // RSS 2.0 person strings should be valid e-mail addresses if possible.
|
---|
| 764 | if (isset($source["{$id}_email"])) {
|
---|
| 765 | $rss_author = $source["{$id}_email"];
|
---|
| 766 | }
|
---|
| 767 | if (isset($source["{$id}_name"])) {
|
---|
| 768 | $rss_author = $source["{$id}_name"]
|
---|
| 769 | . (isset($rss_author) ? " <$rss_author>" : '');
|
---|
| 770 | }
|
---|
| 771 | if (isset($rss_author)) {
|
---|
| 772 | $source[$id] = $rss_author; // goes to top-level author or contributor
|
---|
| 773 | $dest[$id_to] = $rss_author; // goes to dc:creator or dc:contributor
|
---|
| 774 | }
|
---|
| 775 | }
|
---|
| 776 |
|
---|
| 777 | // Normalize Atom 1.0 and RSS 2.0 categories to Dublin Core...
|
---|
| 778 | function normalize_category (&$source, $from, &$dest, $to, $i) {
|
---|
| 779 | $cat_id = $this->element_id($from, $i);
|
---|
| 780 | $dc_id = $this->element_id($to, $i);
|
---|
| 781 |
|
---|
| 782 | // first normalize category elements: Atom 1.0 <=> RSS 2.0
|
---|
| 783 | if ( isset($source["{$cat_id}@term"]) ) { // category identifier
|
---|
| 784 | $source[$cat_id] = $source["{$cat_id}@term"];
|
---|
| 785 | } elseif ( $this->feed_type == RSS ) {
|
---|
| 786 | $source["{$cat_id}@term"] = $source[$cat_id];
|
---|
| 787 | }
|
---|
| 788 |
|
---|
| 789 | if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy
|
---|
| 790 | $source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"];
|
---|
| 791 | } elseif ( isset($source["{$cat_id}@domain"]) ) {
|
---|
| 792 | $source["{$cat_id}@scheme"] = $source["{$cat_id}@domain"];
|
---|
| 793 | }
|
---|
| 794 |
|
---|
| 795 | // Now put the identifier into dc:subject
|
---|
| 796 | $dest[$dc_id] = $source[$cat_id];
|
---|
| 797 | }
|
---|
| 798 |
|
---|
| 799 | // ... or vice versa
|
---|
| 800 | function normalize_dc_subject (&$source, $from, &$dest, $to, $i) {
|
---|
| 801 | $dc_id = $this->element_id($from, $i);
|
---|
| 802 | $cat_id = $this->element_id($to, $i);
|
---|
| 803 |
|
---|
| 804 | $dest[$cat_id] = $source[$dc_id]; // RSS 2.0
|
---|
| 805 | $dest["{$cat_id}@term"] = $source[$dc_id]; // Atom 1.0
|
---|
| 806 | }
|
---|
| 807 |
|
---|
| 808 | // simplify the logic for normalize(). Makes sure that count of elements and
|
---|
| 809 | // each of multiple elements is normalized properly. If you need to mess
|
---|
| 810 | // with things like attributes or change formats or the like, pass it a
|
---|
| 811 | // callback to handle each element.
|
---|
| 812 | function normalize_element (&$source, $from, &$dest, $to, $via = NULL) {
|
---|
| 813 | if (isset($source[$from]) or isset($source["{$from}#"])) {
|
---|
| 814 | if (isset($source["{$from}#"])) {
|
---|
| 815 | $n = $source["{$from}#"];
|
---|
| 816 | $dest["{$to}#"] = $source["{$from}#"];
|
---|
| 817 | }
|
---|
| 818 | else { $n = 1; }
|
---|
| 819 |
|
---|
| 820 | for ($i = 1; $i <= $n; $i++) {
|
---|
| 821 | if (isset($via)) { // custom callback for ninja attacks
|
---|
| 822 | $this->{$via}($source, $from, $dest, $to, $i);
|
---|
| 823 | }
|
---|
| 824 | else { // just make it the same
|
---|
| 825 | $from_id = $this->element_id($from, $i);
|
---|
| 826 | $to_id = $this->element_id($to, $i);
|
---|
| 827 | $dest[$to_id] = $source[$from_id];
|
---|
| 828 | }
|
---|
| 829 | }
|
---|
| 830 | }
|
---|
| 831 | }
|
---|
| 832 |
|
---|
| 833 | function normalize () {
|
---|
| 834 | // if atom populate rss fields and normalize 0.3 and 1.0 feeds
|
---|
| 835 | if ( $this->is_atom() ) {
|
---|
| 836 | // Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!)
|
---|
| 837 | if ($this->feed_version < 1.0) {
|
---|
| 838 | $this->normalize_element($this->channel, 'tagline', $this->channel, 'subtitle');
|
---|
| 839 | $this->normalize_element($this->channel, 'copyright', $this->channel, 'rights');
|
---|
| 840 | $this->normalize_element($this->channel, 'modified', $this->channel, 'updated');
|
---|
| 841 | } else {
|
---|
| 842 | $this->normalize_element($this->channel, 'subtitle', $this->channel, 'tagline');
|
---|
| 843 | $this->normalize_element($this->channel, 'rights', $this->channel, 'copyright');
|
---|
| 844 | $this->normalize_element($this->channel, 'updated', $this->channel, 'modified');
|
---|
| 845 | }
|
---|
| 846 | $this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person');
|
---|
| 847 | $this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person');
|
---|
| 848 |
|
---|
| 849 | // Atom elements to RSS elements
|
---|
| 850 | $this->normalize_element($this->channel, 'subtitle', $this->channel, 'description');
|
---|
| 851 |
|
---|
| 852 | if ( isset($this->channel['logo']) ) {
|
---|
| 853 | $this->normalize_element($this->channel, 'logo', $this->image, 'url');
|
---|
| 854 | $this->normalize_element($this->channel, 'link', $this->image, 'link');
|
---|
| 855 | $this->normalize_element($this->channel, 'title', $this->image, 'title');
|
---|
| 856 | }
|
---|
| 857 |
|
---|
| 858 | for ( $i = 0; $i < count($this->items); $i++) {
|
---|
| 859 | $item = $this->items[$i];
|
---|
| 860 |
|
---|
| 861 | // Atom 1.0 elements <=> Atom 0.3 elements
|
---|
| 862 | if ($this->feed_version < 1.0) {
|
---|
| 863 | $this->normalize_element($item, 'modified', $item, 'updated');
|
---|
| 864 | $this->normalize_element($item, 'issued', $item, 'published');
|
---|
| 865 | } else {
|
---|
| 866 | $this->normalize_element($item, 'updated', $item, 'modified');
|
---|
| 867 | $this->normalize_element($item, 'published', $item, 'issued');
|
---|
| 868 | }
|
---|
| 869 |
|
---|
| 870 | // "If an atom:entry element does not contain
|
---|
| 871 | // atom:author elements, then the atom:author elements
|
---|
| 872 | // of the contained atom:source element are considered
|
---|
| 873 | // to apply. In an Atom Feed Document, the atom:author
|
---|
| 874 | // elements of the containing atom:feed element are
|
---|
| 875 | // considered to apply to the entry if there are no
|
---|
| 876 | // atom:author elements in the locations described
|
---|
| 877 | // above." <http://atompub.org/2005/08/17/draft-ietf-atompub-format-11.html#rfc.section.4.2.1>
|
---|
| 878 | if (!isset($item["author#"])) {
|
---|
| 879 | if (isset($item["source_author#"])) { // from aggregation source
|
---|
| 880 | $source = $item;
|
---|
| 881 | $author = "source_author";
|
---|
| 882 | } elseif (isset($this->channel["author#"])) { // from containing feed
|
---|
| 883 | $source = $this->channel;
|
---|
| 884 | $author = "author";
|
---|
| 885 | } else {
|
---|
| 886 | $author = null;
|
---|
| 887 | }
|
---|
| 888 |
|
---|
| 889 | if (!is_null($author)) {
|
---|
| 890 | $item["author#"] = $source["{$author}#"];
|
---|
| 891 | for ($au = 1; $au <= $item["author#"]; $au++) {
|
---|
| 892 | $id_to = $this->element_id('author', $au);
|
---|
| 893 | $id_from = $this->element_id($author, $au);
|
---|
| 894 |
|
---|
| 895 | $item[$id_to] = $source[$id_from];
|
---|
| 896 | foreach (array('name', 'email', 'uri', 'url') as $what) {
|
---|
| 897 | if (isset($source["{$id_from}_{$what}"])) {
|
---|
| 898 | $item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"];
|
---|
| 899 | }
|
---|
| 900 | }
|
---|
| 901 | }
|
---|
| 902 | }
|
---|
| 903 | }
|
---|
| 904 |
|
---|
| 905 | // Atom elements to RSS elements
|
---|
| 906 | $this->normalize_element($item, 'author', $item['dc'], 'creator', 'normalize_atom_person');
|
---|
| 907 | $this->normalize_element($item, 'contributor', $item['dc'], 'contributor', 'normalize_atom_person');
|
---|
| 908 | $this->normalize_element($item, 'summary', $item, 'description');
|
---|
| 909 | $this->normalize_element($item, 'atom_content', $item['content'], 'encoded');
|
---|
| 910 | $this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure');
|
---|
| 911 |
|
---|
| 912 | // Categories
|
---|
| 913 | if ( isset($item['category#']) ) { // Atom 1.0 categories to dc:subject and RSS 2.0 categories
|
---|
| 914 | $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
|
---|
| 915 | }
|
---|
| 916 | elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
|
---|
| 917 | $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
|
---|
| 918 | }
|
---|
| 919 |
|
---|
| 920 | // Normalized item timestamp
|
---|
| 921 | $atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated'];
|
---|
| 922 | if ( $atom_date ) {
|
---|
| 923 | $epoch = @parse_w3cdtf($atom_date);
|
---|
| 924 | if ($epoch and $epoch > 0) {
|
---|
| 925 | $item['date_timestamp'] = $epoch;
|
---|
| 926 | }
|
---|
| 927 | }
|
---|
| 928 |
|
---|
| 929 | $this->items[$i] = $item;
|
---|
| 930 | }
|
---|
| 931 | }
|
---|
| 932 | elseif ( $this->is_rss() ) {
|
---|
| 933 | // RSS elements to Atom elements
|
---|
| 934 | $this->normalize_element($this->channel, 'description', $this->channel, 'tagline'); // Atom 0.3
|
---|
| 935 | $this->normalize_element($this->channel, 'description', $this->channel, 'subtitle'); // Atom 1.0 (yay wordsmithing!)
|
---|
| 936 | $this->normalize_element($this->image, 'url', $this->channel, 'logo');
|
---|
| 937 |
|
---|
| 938 | for ( $i = 0; $i < count($this->items); $i++) {
|
---|
| 939 | $item = $this->items[$i];
|
---|
| 940 |
|
---|
| 941 | // RSS elements to Atom elements
|
---|
| 942 | $this->normalize_element($item, 'description', $item, 'summary');
|
---|
| 943 | $this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure');
|
---|
| 944 |
|
---|
| 945 | // Categories
|
---|
| 946 | if ( isset($item['category#']) ) { // RSS 2.0 categories to dc:subject and Atom 1.0 categories
|
---|
| 947 | $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
|
---|
| 948 | }
|
---|
| 949 | elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
|
---|
| 950 | $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
|
---|
| 951 | }
|
---|
| 952 |
|
---|
| 953 | // Normalized item timestamp
|
---|
| 954 | if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
|
---|
| 955 | $epoch = @parse_w3cdtf($item['dc']['date']);
|
---|
| 956 | if ($epoch and $epoch > 0) {
|
---|
| 957 | $item['date_timestamp'] = $epoch;
|
---|
| 958 | }
|
---|
| 959 | }
|
---|
| 960 | elseif ( isset($item['pubdate']) ) {
|
---|
| 961 | $epoch = @strtotime($item['pubdate']);
|
---|
| 962 | if ($epoch > 0) {
|
---|
| 963 | $item['date_timestamp'] = $epoch;
|
---|
| 964 | }
|
---|
| 965 | }
|
---|
| 966 |
|
---|
| 967 | $this->items[$i] = $item;
|
---|
| 968 | }
|
---|
| 969 | }
|
---|
| 970 | }
|
---|
| 971 |
|
---|
| 972 |
|
---|
| 973 | function is_rss () {
|
---|
| 974 | if ( $this->feed_type == RSS ) {
|
---|
| 975 | return $this->feed_version;
|
---|
| 976 | }
|
---|
| 977 | else {
|
---|
| 978 | return false;
|
---|
| 979 | }
|
---|
| 980 | }
|
---|
| 981 |
|
---|
| 982 | function is_atom() {
|
---|
| 983 | if ( $this->feed_type == ATOM ) {
|
---|
| 984 | return $this->feed_version;
|
---|
| 985 | }
|
---|
| 986 | else {
|
---|
| 987 | return false;
|
---|
| 988 | }
|
---|
| 989 | }
|
---|
| 990 |
|
---|
| 991 | /**
|
---|
| 992 | * return XML parser, and possibly re-encoded source
|
---|
| 993 | *
|
---|
| 994 | */
|
---|
| 995 | function create_parser($source, $out_enc, $in_enc, $detect) {
|
---|
| 996 | if ( substr(phpversion(),0,1) == 5) {
|
---|
| 997 | $parser = $this->php5_create_parser($in_enc, $detect);
|
---|
| 998 | }
|
---|
| 999 | else {
|
---|
| 1000 | list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
|
---|
| 1001 | }
|
---|
| 1002 | if ($out_enc) {
|
---|
| 1003 | $this->encoding = $out_enc;
|
---|
| 1004 | xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
|
---|
| 1005 | }
|
---|
| 1006 | xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
|
---|
| 1007 | return array($parser, $source);
|
---|
| 1008 | }
|
---|
| 1009 |
|
---|
| 1010 | /**
|
---|
| 1011 | * Instantiate an XML parser under PHP5
|
---|
| 1012 | *
|
---|
| 1013 | * PHP5 will do a fine job of detecting input encoding
|
---|
| 1014 | * if passed an empty string as the encoding.
|
---|
| 1015 | *
|
---|
| 1016 | * All hail libxml2!
|
---|
| 1017 | *
|
---|
| 1018 | */
|
---|
| 1019 | function php5_create_parser($in_enc, $detect) {
|
---|
| 1020 | // by default php5 does a fine job of detecting input encodings
|
---|
| 1021 | if(!$detect && $in_enc) {
|
---|
| 1022 | return xml_parser_create($in_enc);
|
---|
| 1023 | }
|
---|
| 1024 | else {
|
---|
| 1025 | return xml_parser_create('');
|
---|
| 1026 | }
|
---|
| 1027 | }
|
---|
| 1028 |
|
---|
| 1029 | /**
|
---|
| 1030 | * Instaniate an XML parser under PHP4
|
---|
| 1031 | *
|
---|
| 1032 | * Unfortunately PHP4's support for character encodings
|
---|
| 1033 | * and especially XML and character encodings sucks. As
|
---|
| 1034 | * long as the documents you parse only contain characters
|
---|
| 1035 | * from the ISO-8859-1 character set (a superset of ASCII,
|
---|
| 1036 | * and a subset of UTF-8) you're fine. However once you
|
---|
| 1037 | * step out of that comfy little world things get mad, bad,
|
---|
| 1038 | * and dangerous to know.
|
---|
| 1039 | *
|
---|
| 1040 | * The following code is based on SJM's work with FoF
|
---|
| 1041 | * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
|
---|
| 1042 | *
|
---|
| 1043 | */
|
---|
| 1044 | function php4_create_parser($source, $in_enc, $detect) {
|
---|
| 1045 | if ( !$detect ) {
|
---|
| 1046 | return array(xml_parser_create($in_enc), $source);
|
---|
| 1047 | }
|
---|
| 1048 |
|
---|
| 1049 | if (!$in_enc) {
|
---|
| 1050 | if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
|
---|
| 1051 | $in_enc = strtoupper($m[1]);
|
---|
| 1052 | $this->source_encoding = $in_enc;
|
---|
| 1053 | }
|
---|
| 1054 | else {
|
---|
| 1055 | $in_enc = 'UTF-8';
|
---|
| 1056 | }
|
---|
| 1057 | }
|
---|
| 1058 |
|
---|
| 1059 | if ($this->known_encoding($in_enc)) {
|
---|
| 1060 | return array(xml_parser_create($in_enc), $source);
|
---|
| 1061 | }
|
---|
| 1062 |
|
---|
| 1063 | // the dectected encoding is not one of the simple encodings PHP knows
|
---|
| 1064 |
|
---|
| 1065 | // attempt to use the iconv extension to
|
---|
| 1066 | // cast the XML to a known encoding
|
---|
| 1067 | // @see http://php.net/iconv
|
---|
| 1068 |
|
---|
| 1069 | if (function_exists('iconv')) {
|
---|
| 1070 | $encoded_source = iconv($in_enc,'UTF-8', $source);
|
---|
| 1071 | if ($encoded_source) {
|
---|
| 1072 | return array(xml_parser_create('UTF-8'), $encoded_source);
|
---|
| 1073 | }
|
---|
| 1074 | }
|
---|
| 1075 |
|
---|
| 1076 | // iconv didn't work, try mb_convert_encoding
|
---|
| 1077 | // @see http://php.net/mbstring
|
---|
| 1078 | if(function_exists('mb_convert_encoding')) {
|
---|
| 1079 | $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
|
---|
| 1080 | if ($encoded_source) {
|
---|
| 1081 | return array(xml_parser_create('UTF-8'), $encoded_source);
|
---|
| 1082 | }
|
---|
| 1083 | }
|
---|
| 1084 |
|
---|
| 1085 | // else
|
---|
| 1086 | $this->error("Feed is in an unsupported character encoding. ($in_enc) " .
|
---|
| 1087 | "You may see strange artifacts, and mangled characters.",
|
---|
| 1088 | E_USER_NOTICE);
|
---|
| 1089 |
|
---|
| 1090 | return array(xml_parser_create(), $source);
|
---|
| 1091 | }
|
---|
| 1092 |
|
---|
| 1093 | function known_encoding($enc) {
|
---|
| 1094 | $enc = strtoupper($enc);
|
---|
| 1095 | if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
|
---|
| 1096 | return $enc;
|
---|
| 1097 | }
|
---|
| 1098 | else {
|
---|
| 1099 | return false;
|
---|
| 1100 | }
|
---|
| 1101 | }
|
---|
| 1102 |
|
---|
| 1103 | function error ($errormsg, $lvl=E_USER_WARNING) {
|
---|
| 1104 | // append PHP's error message if track_errors enabled
|
---|
| 1105 | if ( isset($php_errormsg) ) {
|
---|
| 1106 | $errormsg .= " ($php_errormsg)";
|
---|
| 1107 | }
|
---|
| 1108 | if ( MAGPIE_DEBUG ) {
|
---|
| 1109 | trigger_error( $errormsg, $lvl);
|
---|
| 1110 | }
|
---|
| 1111 | else {
|
---|
| 1112 | error_log( $errormsg, 0);
|
---|
| 1113 | }
|
---|
| 1114 |
|
---|
| 1115 | $notices = E_USER_NOTICE|E_NOTICE;
|
---|
| 1116 | if ( $lvl&$notices ) {
|
---|
| 1117 | $this->WARNING = $errormsg;
|
---|
| 1118 | } else {
|
---|
| 1119 | $this->ERROR = $errormsg;
|
---|
| 1120 | }
|
---|
| 1121 | }
|
---|
| 1122 |
|
---|
| 1123 | // magic ID function for multiple elemenets.
|
---|
| 1124 | // can be called as static MagpieRSS::element_id()
|
---|
| 1125 | function element_id ($el, $counter) {
|
---|
| 1126 | return $el . (($counter > 1) ? '#'.$counter : '');
|
---|
| 1127 | }
|
---|
| 1128 |
|
---|
| 1129 | function map_attrs($k, $v) {
|
---|
| 1130 | return $k.'="'.htmlspecialchars($v, ENT_COMPAT, $this->encoding).'"';
|
---|
| 1131 | }
|
---|
| 1132 |
|
---|
| 1133 | function accepts_namespaced_xml ($attrs) {
|
---|
| 1134 | $mode = (isset($attrs['mode']) ? trim(strtolower($attrs['mode'])) : 'xml');
|
---|
| 1135 | $type = (isset($attrs['type']) ? trim(strtolower($attrs['type'])) : null);
|
---|
| 1136 | if ($this->feed_type == ATOM and $this->feed_version < 1.0) {
|
---|
| 1137 | if ($mode=='xml' and preg_match(':[/+](html|xml)$:i', $type)) {
|
---|
| 1138 | $ret = true;
|
---|
| 1139 | } else {
|
---|
| 1140 | $ret = false;
|
---|
| 1141 | }
|
---|
| 1142 | } elseif ($this->feed_type == ATOM and $this->feed_version >= 1.0) {
|
---|
| 1143 | if ($type=='xhtml' or preg_match(':[/+]xml$:i', $type)) {
|
---|
| 1144 | $ret = true;
|
---|
| 1145 | } else {
|
---|
| 1146 | $ret = false;
|
---|
| 1147 | }
|
---|
| 1148 | } else {
|
---|
| 1149 | $ret = false; // Don't munge unless you're sure
|
---|
| 1150 | }
|
---|
| 1151 | return $ret;
|
---|
| 1152 | }
|
---|
| 1153 | } // end class RSS
|
---|
| 1154 |
|
---|
| 1155 |
|
---|
| 1156 | // patch to support medieval versions of PHP4.1.x,
|
---|
| 1157 | // courtesy, Ryan Currie, ryan@digibliss.com
|
---|
| 1158 |
|
---|
| 1159 | if (!function_exists('array_change_key_case')) {
|
---|
| 1160 | define("CASE_UPPER",1);
|
---|
| 1161 | define("CASE_LOWER",0);
|
---|
| 1162 |
|
---|
| 1163 |
|
---|
| 1164 | function array_change_key_case($array,$case=CASE_LOWER) {
|
---|
| 1165 | if ($case==CASE_LOWER) $cmd='strtolower';
|
---|
| 1166 | elseif ($case==CASE_UPPER) $cmd='strtoupper';
|
---|
| 1167 | foreach($array as $key=>$value) {
|
---|
| 1168 | $output[$cmd($key)]=$value;
|
---|
| 1169 | }
|
---|
| 1170 | return $output;
|
---|
| 1171 | }
|
---|
| 1172 |
|
---|
| 1173 | }
|
---|
| 1174 |
|
---|
| 1175 | ################################################################################
|
---|
| 1176 | ## WordPress: Load in Snoopy from wp-includes ##################################
|
---|
| 1177 | ################################################################################
|
---|
| 1178 |
|
---|
| 1179 | if (!function_exists('wp_remote_request')) :
|
---|
| 1180 | require_once( dirname(__FILE__) . '/class-snoopy.php');
|
---|
| 1181 | endif;
|
---|
| 1182 |
|
---|
| 1183 | ################################################################################
|
---|
| 1184 | ## rss_fetch.inc: from MagpieRSS 0.8a ##########################################
|
---|
| 1185 | ################################################################################
|
---|
| 1186 |
|
---|
| 1187 | /*=======================================================================*\
|
---|
| 1188 | Function: fetch_rss:
|
---|
| 1189 | Purpose: return RSS object for the give url
|
---|
| 1190 | maintain the cache
|
---|
| 1191 | Input: url of RSS file
|
---|
| 1192 | Output: parsed RSS object (see rss_parse.inc)
|
---|
| 1193 |
|
---|
| 1194 | NOTES ON CACHEING:
|
---|
| 1195 | If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
|
---|
| 1196 |
|
---|
| 1197 | NOTES ON RETRIEVING REMOTE FILES:
|
---|
| 1198 | If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
|
---|
| 1199 | return a cached object, and touch the cache object upon recieving a
|
---|
| 1200 | 304.
|
---|
| 1201 |
|
---|
| 1202 | NOTES ON FAILED REQUESTS:
|
---|
| 1203 | If there is an HTTP error while fetching an RSS object, the cached
|
---|
| 1204 | version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
|
---|
| 1205 | \*=======================================================================*/
|
---|
| 1206 |
|
---|
| 1207 | define('MAGPIE_VERSION', '2009.0618');
|
---|
| 1208 |
|
---|
| 1209 | $MAGPIE_ERROR = "";
|
---|
| 1210 |
|
---|
| 1211 | function fetch_rss ($url) {
|
---|
| 1212 | // initialize constants
|
---|
| 1213 | init();
|
---|
| 1214 |
|
---|
| 1215 | if ( !isset($url) ) {
|
---|
| 1216 | error("fetch_rss called without a url");
|
---|
| 1217 | return false;
|
---|
| 1218 | }
|
---|
| 1219 |
|
---|
| 1220 | // if cache is disabled
|
---|
| 1221 | if ( !MAGPIE_CACHE_ON ) {
|
---|
| 1222 | // fetch file, and parse it
|
---|
| 1223 | $resp = _fetch_remote_file( $url );
|
---|
| 1224 | if ( is_success( $resp->status ) ) {
|
---|
| 1225 | return _response_to_rss( $resp, $url );
|
---|
| 1226 | }
|
---|
| 1227 | else {
|
---|
| 1228 | error("Failed to fetch $url and cache is off");
|
---|
| 1229 | return false;
|
---|
| 1230 | }
|
---|
| 1231 | }
|
---|
| 1232 | // else cache is ON
|
---|
| 1233 | else {
|
---|
| 1234 | // Flow
|
---|
| 1235 | // 1. check cache
|
---|
| 1236 | // 2. if there is a hit, make sure its fresh
|
---|
| 1237 | // 3. if cached obj fails freshness check, fetch remote
|
---|
| 1238 | // 4. if remote fails, return stale object, or error
|
---|
| 1239 |
|
---|
| 1240 | $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
|
---|
| 1241 |
|
---|
| 1242 | if (MAGPIE_DEBUG and $cache->ERROR) {
|
---|
| 1243 | debug($cache->ERROR, E_USER_WARNING);
|
---|
| 1244 | }
|
---|
| 1245 |
|
---|
| 1246 |
|
---|
| 1247 | $cache_status = 0; // response of check_cache
|
---|
| 1248 | $request_headers = array(); // HTTP headers to send with fetch
|
---|
| 1249 | $rss = 0; // parsed RSS object
|
---|
| 1250 | $errormsg = 0; // errors, if any
|
---|
| 1251 |
|
---|
| 1252 | // store parsed XML by desired output encoding
|
---|
| 1253 | // as character munging happens at parse time
|
---|
| 1254 | $cache_key = $url . MAGPIE_OUTPUT_ENCODING;
|
---|
| 1255 |
|
---|
| 1256 | if (!$cache->ERROR) {
|
---|
| 1257 | // return cache HIT, MISS, or STALE
|
---|
| 1258 | $cache_status = $cache->check_cache( $cache_key);
|
---|
| 1259 | }
|
---|
| 1260 |
|
---|
| 1261 | // if object cached, and cache is fresh, return cached obj
|
---|
| 1262 | if ( $cache_status == 'HIT' ) {
|
---|
| 1263 | $rss = $cache->get( $cache_key );
|
---|
| 1264 | if ( isset($rss) and $rss ) {
|
---|
| 1265 | // should be cache age
|
---|
| 1266 | $rss->from_cache = 1;
|
---|
| 1267 | if ( MAGPIE_DEBUG > 1) {
|
---|
| 1268 | debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
|
---|
| 1269 | }
|
---|
| 1270 | return $rss;
|
---|
| 1271 | }
|
---|
| 1272 | }
|
---|
| 1273 |
|
---|
| 1274 | // else attempt a conditional get
|
---|
| 1275 |
|
---|
| 1276 | // setup headers
|
---|
| 1277 | if ( $cache_status == 'STALE' ) {
|
---|
| 1278 | $rss = $cache->get( $cache_key );
|
---|
| 1279 | if ( $rss and isset($rss->etag) and $rss->last_modified ) {
|
---|
| 1280 | $request_headers['If-None-Match'] = $rss->etag;
|
---|
| 1281 | $request_headers['If-Last-Modified'] = $rss->last_modified;
|
---|
| 1282 | }
|
---|
| 1283 | }
|
---|
| 1284 |
|
---|
| 1285 | $resp = _fetch_remote_file( $url, $request_headers );
|
---|
| 1286 |
|
---|
| 1287 | if (isset($resp) and $resp) {
|
---|
| 1288 | if ($resp->status == '304' ) {
|
---|
| 1289 | // we have the most current copy
|
---|
| 1290 | if ( MAGPIE_DEBUG > 1) {
|
---|
| 1291 | debug("Got 304 for $url");
|
---|
| 1292 | }
|
---|
| 1293 | // reset cache on 304 (at minutillo insistent prodding)
|
---|
| 1294 | $cache->set($cache_key, $rss);
|
---|
| 1295 | return $rss;
|
---|
| 1296 | }
|
---|
| 1297 | elseif ( is_success( $resp->status ) ) {
|
---|
| 1298 | $rss = _response_to_rss( $resp, $url );
|
---|
| 1299 | if ( $rss ) {
|
---|
| 1300 | if (MAGPIE_DEBUG > 1) {
|
---|
| 1301 | debug("Fetch successful");
|
---|
| 1302 | }
|
---|
| 1303 | // add object to cache
|
---|
| 1304 | $cache->set( $cache_key, $rss );
|
---|
| 1305 | return $rss;
|
---|
| 1306 | }
|
---|
| 1307 | }
|
---|
| 1308 | else {
|
---|
| 1309 | $errormsg = "Failed to fetch $url ";
|
---|
| 1310 | if ( $resp->status == '-100' ) {
|
---|
| 1311 | $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
|
---|
| 1312 | }
|
---|
| 1313 | elseif ( $resp->error ) {
|
---|
| 1314 | # compensate for Snoopy's annoying habbit to tacking
|
---|
| 1315 | # on '\n'
|
---|
| 1316 | $http_error = substr($resp->error, 0, -2);
|
---|
| 1317 | $errormsg .= "(HTTP Error: $http_error)";
|
---|
| 1318 | }
|
---|
| 1319 | else {
|
---|
| 1320 | $errormsg .= "(HTTP Response: " . $resp->response_code .')';
|
---|
| 1321 | }
|
---|
| 1322 | }
|
---|
| 1323 | }
|
---|
| 1324 | else {
|
---|
| 1325 | $errormsg = "Unable to retrieve RSS file for unknown reasons.";
|
---|
| 1326 | }
|
---|
| 1327 |
|
---|
| 1328 | // else fetch failed
|
---|
| 1329 | debug("MagpieRSS fetch failed [$errormsg]");
|
---|
| 1330 |
|
---|
| 1331 | // attempt to return cached object
|
---|
| 1332 | if ($rss) {
|
---|
| 1333 | if ( MAGPIE_DEBUG ) {
|
---|
| 1334 | debug("Returning STALE object for $url");
|
---|
| 1335 | }
|
---|
| 1336 | return $rss;
|
---|
| 1337 | }
|
---|
| 1338 |
|
---|
| 1339 | // else we totally failed
|
---|
| 1340 | error( $errormsg );
|
---|
| 1341 |
|
---|
| 1342 | return false;
|
---|
| 1343 |
|
---|
| 1344 | } // end if ( !MAGPIE_CACHE_ON ) {
|
---|
| 1345 | } // end fetch_rss()
|
---|
| 1346 |
|
---|
| 1347 | /*=======================================================================*\
|
---|
| 1348 | Function: error
|
---|
| 1349 | Purpose: set MAGPIE_ERROR, and trigger error
|
---|
| 1350 | \*=======================================================================*/
|
---|
| 1351 |
|
---|
| 1352 | function error ($errormsg, $lvl=E_USER_WARNING) {
|
---|
| 1353 | global $MAGPIE_ERROR;
|
---|
| 1354 |
|
---|
| 1355 | // append PHP's error message if track_errors enabled
|
---|
| 1356 | if ( isset($php_errormsg) ) {
|
---|
| 1357 | $errormsg .= " ($php_errormsg)";
|
---|
| 1358 | }
|
---|
| 1359 | if ( $errormsg ) {
|
---|
| 1360 | $errormsg = "MagpieRSS: $errormsg";
|
---|
| 1361 | $MAGPIE_ERROR = $errormsg;
|
---|
| 1362 | if ( MAGPIE_DEBUG ) {
|
---|
| 1363 | trigger_error( $errormsg, $lvl);
|
---|
| 1364 | } else {
|
---|
| 1365 | error_log($errormsg, 0);
|
---|
| 1366 | }
|
---|
| 1367 | }
|
---|
| 1368 | }
|
---|
| 1369 |
|
---|
| 1370 | function debug ($debugmsg, $lvl=E_USER_NOTICE) {
|
---|
| 1371 | trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
|
---|
| 1372 | }
|
---|
| 1373 |
|
---|
| 1374 | /*=======================================================================*\
|
---|
| 1375 | Function: magpie_error
|
---|
| 1376 | Purpose: accessor for the magpie error variable
|
---|
| 1377 | \*=======================================================================*/
|
---|
| 1378 | function magpie_error ($errormsg="") {
|
---|
| 1379 | global $MAGPIE_ERROR;
|
---|
| 1380 |
|
---|
| 1381 | if ( isset($errormsg) and $errormsg ) {
|
---|
| 1382 | $MAGPIE_ERROR = $errormsg;
|
---|
| 1383 | }
|
---|
| 1384 |
|
---|
| 1385 | return $MAGPIE_ERROR;
|
---|
| 1386 | }
|
---|
| 1387 |
|
---|
| 1388 | /*=======================================================================*\
|
---|
| 1389 | Function: _fetch_remote_file
|
---|
| 1390 | Purpose: retrieve an arbitrary remote file
|
---|
| 1391 | Input: url of the remote file
|
---|
| 1392 | headers to send along with the request (optional)
|
---|
| 1393 | Output: an HTTP response object (see Snoopy.class.inc)
|
---|
| 1394 | \*=======================================================================*/
|
---|
| 1395 | function _fetch_remote_file ($url, $headers = "" ) {
|
---|
| 1396 | // Ensure that we have constants set up, since they are used below.
|
---|
| 1397 | init();
|
---|
| 1398 |
|
---|
| 1399 | // WordPress 2.7 has deprecated Snoopy. It's still there, for now, but
|
---|
| 1400 | // I'd rather not rely on it.
|
---|
| 1401 | if (function_exists('wp_remote_request')) :
|
---|
| 1402 | $resp = wp_remote_request($url, array(
|
---|
| 1403 | 'headers' => $headers,
|
---|
| 1404 | 'timeout' => MAGPIE_FETCH_TIME_OUT)
|
---|
| 1405 | );
|
---|
| 1406 |
|
---|
| 1407 | if ( is_wp_error($resp) ) :
|
---|
| 1408 | $error = $resp->get_error_messages();
|
---|
| 1409 |
|
---|
| 1410 | $client = new stdClass;
|
---|
| 1411 | $client->status = 500;
|
---|
| 1412 | $client->response_code = 500;
|
---|
| 1413 | $client->error = implode(" / ", $error). "\n"; //\n = Snoopy compatibility
|
---|
| 1414 | else :
|
---|
| 1415 | $client = new stdClass;
|
---|
| 1416 | $client->status = $resp['response']['code'];
|
---|
| 1417 | $client->response_code = $resp['response']['code'];
|
---|
| 1418 | $client->headers = $resp['headers'];
|
---|
| 1419 | $client->results = $resp['body'];
|
---|
| 1420 | endif;
|
---|
| 1421 | else :
|
---|
| 1422 | // Snoopy is an HTTP client in PHP
|
---|
| 1423 | $client = new Snoopy();
|
---|
| 1424 | $client->agent = MAGPIE_USER_AGENT;
|
---|
| 1425 | $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
|
---|
| 1426 | $client->use_gzip = MAGPIE_USE_GZIP;
|
---|
| 1427 | if (is_array($headers) ) {
|
---|
| 1428 | $client->rawheaders = $headers;
|
---|
| 1429 | }
|
---|
| 1430 | @$client->fetch($url);
|
---|
| 1431 | endif;
|
---|
| 1432 | return $client;
|
---|
| 1433 | }
|
---|
| 1434 |
|
---|
| 1435 | /*=======================================================================*\
|
---|
| 1436 | Function: _response_to_rss
|
---|
| 1437 | Purpose: parse an HTTP response object into an RSS object
|
---|
| 1438 | Input: an HTTP response object (see Snoopy)
|
---|
| 1439 | Output: parsed RSS object (see rss_parse)
|
---|
| 1440 | \*=======================================================================*/
|
---|
| 1441 | function _response_to_rss ($resp, $url = null) {
|
---|
| 1442 | $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING, $url );
|
---|
| 1443 |
|
---|
| 1444 | // if RSS parsed successfully
|
---|
| 1445 | if ( $rss and !$rss->ERROR) {
|
---|
| 1446 | $rss->http_status = $resp->status;
|
---|
| 1447 |
|
---|
| 1448 | // find Etag, and Last-Modified
|
---|
| 1449 | foreach($resp->headers as $h) {
|
---|
| 1450 | // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
|
---|
| 1451 | if (strpos($h, ": ")) {
|
---|
| 1452 | list($field, $val) = explode(": ", $h, 2);
|
---|
| 1453 | }
|
---|
| 1454 | else {
|
---|
| 1455 | $field = $h;
|
---|
| 1456 | $val = "";
|
---|
| 1457 | }
|
---|
| 1458 |
|
---|
| 1459 | $rss->header[$field] = $val;
|
---|
| 1460 |
|
---|
| 1461 | if ( $field == 'ETag' ) {
|
---|
| 1462 | $rss->etag = $val;
|
---|
| 1463 | }
|
---|
| 1464 |
|
---|
| 1465 | if ( $field == 'Last-Modified' ) {
|
---|
| 1466 | $rss->last_modified = $val;
|
---|
| 1467 | }
|
---|
| 1468 | }
|
---|
| 1469 |
|
---|
| 1470 | return $rss;
|
---|
| 1471 | } // else construct error message
|
---|
| 1472 | else {
|
---|
| 1473 | $errormsg = "Failed to parse RSS file.";
|
---|
| 1474 |
|
---|
| 1475 | if ($rss) {
|
---|
| 1476 | $errormsg .= " (" . $rss->ERROR . ")";
|
---|
| 1477 | }
|
---|
| 1478 | error($errormsg);
|
---|
| 1479 |
|
---|
| 1480 | return false;
|
---|
| 1481 | } // end if ($rss and !$rss->error)
|
---|
| 1482 | }
|
---|
| 1483 |
|
---|
| 1484 | /*=======================================================================*\
|
---|
| 1485 | Function: init
|
---|
| 1486 | Purpose: setup constants with default values
|
---|
| 1487 | check for user overrides
|
---|
| 1488 | \*=======================================================================*/
|
---|
| 1489 | function init () {
|
---|
| 1490 | if ( defined('MAGPIE_INITALIZED') ) {
|
---|
| 1491 | return;
|
---|
| 1492 | }
|
---|
| 1493 | else {
|
---|
| 1494 | define('MAGPIE_INITALIZED', true);
|
---|
| 1495 | }
|
---|
| 1496 |
|
---|
| 1497 | if ( !defined('MAGPIE_CACHE_ON') ) {
|
---|
| 1498 | define('MAGPIE_CACHE_ON', true);
|
---|
| 1499 | }
|
---|
| 1500 |
|
---|
| 1501 | if ( !defined('MAGPIE_CACHE_DIR') ) {
|
---|
| 1502 | define('MAGPIE_CACHE_DIR', './cache');
|
---|
| 1503 | }
|
---|
| 1504 |
|
---|
| 1505 | if ( !defined('MAGPIE_CACHE_AGE') ) {
|
---|
| 1506 | define('MAGPIE_CACHE_AGE', 60*60); // one hour
|
---|
| 1507 | }
|
---|
| 1508 |
|
---|
| 1509 | if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
|
---|
| 1510 | define('MAGPIE_CACHE_FRESH_ONLY', false);
|
---|
| 1511 | }
|
---|
| 1512 |
|
---|
| 1513 | if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
|
---|
| 1514 | define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
|
---|
| 1515 | }
|
---|
| 1516 |
|
---|
| 1517 | if ( !defined('MAGPIE_INPUT_ENCODING') ) {
|
---|
| 1518 | define('MAGPIE_INPUT_ENCODING', null);
|
---|
| 1519 | }
|
---|
| 1520 |
|
---|
| 1521 | if ( !defined('MAGPIE_DETECT_ENCODING') ) {
|
---|
| 1522 | define('MAGPIE_DETECT_ENCODING', true);
|
---|
| 1523 | }
|
---|
| 1524 |
|
---|
| 1525 | if ( !defined('MAGPIE_DEBUG') ) {
|
---|
| 1526 | define('MAGPIE_DEBUG', 0);
|
---|
| 1527 | }
|
---|
| 1528 |
|
---|
| 1529 | if ( !defined('MAGPIE_USER_AGENT') ) {
|
---|
| 1530 | $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
|
---|
| 1531 |
|
---|
| 1532 | if ( MAGPIE_CACHE_ON ) {
|
---|
| 1533 | $ua = $ua . ')';
|
---|
| 1534 | }
|
---|
| 1535 | else {
|
---|
| 1536 | $ua = $ua . '; No cache)';
|
---|
| 1537 | }
|
---|
| 1538 |
|
---|
| 1539 | define('MAGPIE_USER_AGENT', $ua);
|
---|
| 1540 | }
|
---|
| 1541 |
|
---|
| 1542 | if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
|
---|
| 1543 | define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
|
---|
| 1544 | }
|
---|
| 1545 |
|
---|
| 1546 | // use gzip encoding to fetch rss files if supported?
|
---|
| 1547 | if ( !defined('MAGPIE_USE_GZIP') ) {
|
---|
| 1548 | define('MAGPIE_USE_GZIP', true);
|
---|
| 1549 | }
|
---|
| 1550 | }
|
---|
| 1551 |
|
---|
| 1552 | // NOTE: the following code should really be in Snoopy, or at least
|
---|
| 1553 | // somewhere other then rss_fetch!
|
---|
| 1554 |
|
---|
| 1555 | /*=======================================================================*\
|
---|
| 1556 | HTTP STATUS CODE PREDICATES
|
---|
| 1557 | These functions attempt to classify an HTTP status code
|
---|
| 1558 | based on RFC 2616 and RFC 2518.
|
---|
| 1559 |
|
---|
| 1560 | All of them take an HTTP status code as input, and return true or false
|
---|
| 1561 |
|
---|
| 1562 | All this code is adapted from LWP's HTTP::Status.
|
---|
| 1563 | \*=======================================================================*/
|
---|
| 1564 |
|
---|
| 1565 |
|
---|
| 1566 | /*=======================================================================*\
|
---|
| 1567 | Function: is_info
|
---|
| 1568 | Purpose: return true if Informational status code
|
---|
| 1569 | \*=======================================================================*/
|
---|
| 1570 | function is_info ($sc) {
|
---|
| 1571 | return $sc >= 100 && $sc < 200;
|
---|
| 1572 | }
|
---|
| 1573 |
|
---|
| 1574 | /*=======================================================================*\
|
---|
| 1575 | Function: is_success
|
---|
| 1576 | Purpose: return true if Successful status code
|
---|
| 1577 | \*=======================================================================*/
|
---|
| 1578 | function is_success ($sc) {
|
---|
| 1579 | return $sc >= 200 && $sc < 300;
|
---|
| 1580 | }
|
---|
| 1581 |
|
---|
| 1582 | /*=======================================================================*\
|
---|
| 1583 | Function: is_redirect
|
---|
| 1584 | Purpose: return true if Redirection status code
|
---|
| 1585 | \*=======================================================================*/
|
---|
| 1586 | function is_redirect ($sc) {
|
---|
| 1587 | return $sc >= 300 && $sc < 400;
|
---|
| 1588 | }
|
---|
| 1589 |
|
---|
| 1590 | /*=======================================================================*\
|
---|
| 1591 | Function: is_error
|
---|
| 1592 | Purpose: return true if Error status code
|
---|
| 1593 | \*=======================================================================*/
|
---|
| 1594 | function is_error ($sc) {
|
---|
| 1595 | return $sc >= 400 && $sc < 600;
|
---|
| 1596 | }
|
---|
| 1597 |
|
---|
| 1598 | /*=======================================================================*\
|
---|
| 1599 | Function: is_client_error
|
---|
| 1600 | Purpose: return true if Error status code, and its a client error
|
---|
| 1601 | \*=======================================================================*/
|
---|
| 1602 | function is_client_error ($sc) {
|
---|
| 1603 | return $sc >= 400 && $sc < 500;
|
---|
| 1604 | }
|
---|
| 1605 |
|
---|
| 1606 | /*=======================================================================*\
|
---|
| 1607 | Function: is_client_error
|
---|
| 1608 | Purpose: return true if Error status code, and its a server error
|
---|
| 1609 | \*=======================================================================*/
|
---|
| 1610 | function is_server_error ($sc) {
|
---|
| 1611 | return $sc >= 500 && $sc < 600;
|
---|
| 1612 | }
|
---|
| 1613 |
|
---|
| 1614 | ################################################################################
|
---|
| 1615 | ## rss_cache.inc: from WordPress 1.5 ###########################################
|
---|
| 1616 | ################################################################################
|
---|
| 1617 |
|
---|
| 1618 | class RSSCache {
|
---|
| 1619 | var $BASE_CACHE = 'wp-content/cache'; // where the cache files are stored
|
---|
| 1620 | var $MAX_AGE = 43200; // when are files stale, default twelve hours
|
---|
| 1621 | var $ERROR = ''; // accumulate error messages
|
---|
| 1622 |
|
---|
| 1623 | function RSSCache ($base='', $age='') {
|
---|
| 1624 | if ( $base ) {
|
---|
| 1625 | $this->BASE_CACHE = $base;
|
---|
| 1626 | }
|
---|
| 1627 | if ( $age ) {
|
---|
| 1628 | $this->MAX_AGE = $age;
|
---|
| 1629 | }
|
---|
| 1630 |
|
---|
| 1631 | }
|
---|
| 1632 |
|
---|
| 1633 | /*=======================================================================*\
|
---|
| 1634 | Function: set
|
---|
| 1635 | Purpose: add an item to the cache, keyed on url
|
---|
| 1636 | Input: url from wich the rss file was fetched
|
---|
| 1637 | Output: true on sucess
|
---|
| 1638 | \*=======================================================================*/
|
---|
| 1639 | function set ($url, $rss) {
|
---|
| 1640 | global $wpdb;
|
---|
| 1641 | $cache_option = 'rss_' . $this->file_name( $url );
|
---|
| 1642 | $cache_timestamp = 'rss_' . $this->file_name( $url ) . '_ts';
|
---|
| 1643 |
|
---|
| 1644 | if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_option'") )
|
---|
| 1645 | add_option($cache_option, '', '', 'no');
|
---|
| 1646 | if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_timestamp'") )
|
---|
| 1647 | add_option($cache_timestamp, '', '', 'no');
|
---|
| 1648 |
|
---|
| 1649 | update_option($cache_option, $rss);
|
---|
| 1650 | update_option($cache_timestamp, time() );
|
---|
| 1651 |
|
---|
| 1652 | return $cache_option;
|
---|
| 1653 | }
|
---|
| 1654 |
|
---|
| 1655 | /*=======================================================================*\
|
---|
| 1656 | Function: get
|
---|
| 1657 | Purpose: fetch an item from the cache
|
---|
| 1658 | Input: url from wich the rss file was fetched
|
---|
| 1659 | Output: cached object on HIT, false on MISS
|
---|
| 1660 | \*=======================================================================*/
|
---|
| 1661 | function get ($url) {
|
---|
| 1662 | $this->ERROR = "";
|
---|
| 1663 | $cache_option = 'rss_' . $this->file_name( $url );
|
---|
| 1664 |
|
---|
| 1665 | if ( ! get_option( $cache_option ) ) {
|
---|
| 1666 | $this->debug(
|
---|
| 1667 | "Cache doesn't contain: $url (cache option: $cache_option)"
|
---|
| 1668 | );
|
---|
| 1669 | return 0;
|
---|
| 1670 | }
|
---|
| 1671 |
|
---|
| 1672 | $rss = get_option( $cache_option );
|
---|
| 1673 |
|
---|
| 1674 | // failsafe; seems to break at odd points in WP MU
|
---|
| 1675 | if (is_string($rss)) {
|
---|
| 1676 | $rss = $this->unserialize($rss);
|
---|
| 1677 | }
|
---|
| 1678 |
|
---|
| 1679 | return $rss;
|
---|
| 1680 | }
|
---|
| 1681 |
|
---|
| 1682 | /*=======================================================================*\
|
---|
| 1683 | Function: check_cache
|
---|
| 1684 | Purpose: check a url for membership in the cache
|
---|
| 1685 | and whether the object is older then MAX_AGE (ie. STALE)
|
---|
| 1686 | Input: url from wich the rss file was fetched
|
---|
| 1687 | Output: cached object on HIT, false on MISS
|
---|
| 1688 | \*=======================================================================*/
|
---|
| 1689 | function check_cache ( $url ) {
|
---|
| 1690 | $this->ERROR = "";
|
---|
| 1691 | $cache_option = $this->file_name( $url );
|
---|
| 1692 | $cache_timestamp = 'rss_' . $this->file_name( $url ) . '_ts';
|
---|
| 1693 |
|
---|
| 1694 | if ( $mtime = get_option($cache_timestamp) ) {
|
---|
| 1695 | // find how long ago the file was added to the cache
|
---|
| 1696 | // and whether that is longer then MAX_AGE
|
---|
| 1697 | $age = time() - $mtime;
|
---|
| 1698 | if ( $this->MAX_AGE > $age ) {
|
---|
| 1699 | // object exists and is current
|
---|
| 1700 | return 'HIT';
|
---|
| 1701 | }
|
---|
| 1702 | else {
|
---|
| 1703 | // object exists but is old
|
---|
| 1704 | return 'STALE';
|
---|
| 1705 | }
|
---|
| 1706 | }
|
---|
| 1707 | else {
|
---|
| 1708 | // object does not exist
|
---|
| 1709 | return 'MISS';
|
---|
| 1710 | }
|
---|
| 1711 | }
|
---|
| 1712 |
|
---|
| 1713 | /*=======================================================================*\
|
---|
| 1714 | Function: serialize
|
---|
| 1715 | \*=======================================================================*/
|
---|
| 1716 | function serialize ( $rss ) {
|
---|
| 1717 | return serialize( $rss );
|
---|
| 1718 | }
|
---|
| 1719 |
|
---|
| 1720 | /*=======================================================================*\
|
---|
| 1721 | Function: unserialize
|
---|
| 1722 | \*=======================================================================*/
|
---|
| 1723 | function unserialize ( $data ) {
|
---|
| 1724 | return unserialize( $data );
|
---|
| 1725 | }
|
---|
| 1726 |
|
---|
| 1727 | /*=======================================================================*\
|
---|
| 1728 | Function: file_name
|
---|
| 1729 | Purpose: map url to location in cache
|
---|
| 1730 | Input: url from wich the rss file was fetched
|
---|
| 1731 | Output: a file name
|
---|
| 1732 | \*=======================================================================*/
|
---|
| 1733 | function file_name ($url) {
|
---|
| 1734 | return md5( $url );
|
---|
| 1735 | }
|
---|
| 1736 |
|
---|
| 1737 | /*=======================================================================*\
|
---|
| 1738 | Function: error
|
---|
| 1739 | Purpose: register error
|
---|
| 1740 | \*=======================================================================*/
|
---|
| 1741 | function error ($errormsg, $lvl=E_USER_WARNING) {
|
---|
| 1742 | // append PHP's error message if track_errors enabled
|
---|
| 1743 | if ( isset($php_errormsg) ) {
|
---|
| 1744 | $errormsg .= " ($php_errormsg)";
|
---|
| 1745 | }
|
---|
| 1746 | $this->ERROR = $errormsg;
|
---|
| 1747 | if ( MAGPIE_DEBUG ) {
|
---|
| 1748 | trigger_error( $errormsg, $lvl);
|
---|
| 1749 | }
|
---|
| 1750 | else {
|
---|
| 1751 | error_log( $errormsg, 0);
|
---|
| 1752 | }
|
---|
| 1753 | }
|
---|
| 1754 | function debug ($debugmsg, $lvl=E_USER_NOTICE) {
|
---|
| 1755 | if ( MAGPIE_DEBUG ) {
|
---|
| 1756 | $this->error("MagpieRSS [debug] $debugmsg", $lvl);
|
---|
| 1757 | }
|
---|
| 1758 | }
|
---|
| 1759 | }
|
---|
| 1760 |
|
---|
| 1761 | ################################################################################
|
---|
| 1762 | ## rss_utils.inc: from MagpieRSS 0.8a ##########################################
|
---|
| 1763 | ################################################################################
|
---|
| 1764 |
|
---|
| 1765 | /*======================================================================*\
|
---|
| 1766 | Function: parse_w3cdtf
|
---|
| 1767 | Purpose: parse a W3CDTF date into unix epoch
|
---|
| 1768 |
|
---|
| 1769 | NOTE: http://www.w3.org/TR/NOTE-datetime
|
---|
| 1770 | \*======================================================================*/
|
---|
| 1771 |
|
---|
| 1772 | function parse_w3cdtf ( $date_str ) {
|
---|
| 1773 |
|
---|
| 1774 | # regex to match wc3dtf
|
---|
| 1775 | $pat = "/^\s*(\d{4})(-(\d{2})(-(\d{2})(T(\d{2}):(\d{2})(:(\d{2})(\.\d+)?)?(?:([-+])(\d{2}):?(\d{2})|(Z))?)?)?)?\s*\$/";
|
---|
| 1776 |
|
---|
| 1777 | if ( preg_match( $pat, $date_str, $match ) ) {
|
---|
| 1778 | list( $year, $month, $day, $hours, $minutes, $seconds) =
|
---|
| 1779 | array( $match[1], $match[3], $match[5], $match[7], $match[8], $match[10]);
|
---|
| 1780 |
|
---|
| 1781 | # W3C dates can omit the time, the day of the month, or even the month.
|
---|
| 1782 | # Fill in any blanks using information from the present moment. --CWJ
|
---|
| 1783 | $default['hr'] = (int) gmdate('H');
|
---|
| 1784 | $default['day'] = (int) gmdate('d');
|
---|
| 1785 | $default['month'] = (int) gmdate('m');
|
---|
| 1786 |
|
---|
| 1787 | if (is_null($hours)) : $hours = $default['hr']; $minutes = 0; $seconds = 0; endif;
|
---|
| 1788 | if (is_null($day)) : $day = $default['day']; endif;
|
---|
| 1789 | if (is_null($month)) : $month = $default['month']; endif;
|
---|
| 1790 |
|
---|
| 1791 | # calc epoch for current date assuming GMT
|
---|
| 1792 | $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);
|
---|
| 1793 |
|
---|
| 1794 | $offset = 0;
|
---|
| 1795 | if ( $match[15] == 'Z' ) {
|
---|
| 1796 | # zulu time, aka GMT
|
---|
| 1797 | }
|
---|
| 1798 | else {
|
---|
| 1799 | list( $tz_mod, $tz_hour, $tz_min ) =
|
---|
| 1800 | array( $match[12], $match[13], $match[14]);
|
---|
| 1801 |
|
---|
| 1802 | # zero out the variables
|
---|
| 1803 | if ( ! $tz_hour ) { $tz_hour = 0; }
|
---|
| 1804 | if ( ! $tz_min ) { $tz_min = 0; }
|
---|
| 1805 |
|
---|
| 1806 | $offset_secs = (($tz_hour*60)+$tz_min)*60;
|
---|
| 1807 |
|
---|
| 1808 | # is timezone ahead of GMT? then subtract offset
|
---|
| 1809 | #
|
---|
| 1810 | if ( $tz_mod == '+' ) {
|
---|
| 1811 | $offset_secs = $offset_secs * -1;
|
---|
| 1812 | }
|
---|
| 1813 |
|
---|
| 1814 | $offset = $offset_secs;
|
---|
| 1815 | }
|
---|
| 1816 | $epoch = $epoch + $offset;
|
---|
| 1817 | return $epoch;
|
---|
| 1818 | }
|
---|
| 1819 | else {
|
---|
| 1820 | return -1;
|
---|
| 1821 | }
|
---|
| 1822 | }
|
---|
| 1823 |
|
---|
| 1824 | # Relative URI static class: PHP class for resolving relative URLs
|
---|
| 1825 | #
|
---|
| 1826 | # This class is derived (under the terms of the GPL) from URL Class 0.3 by
|
---|
| 1827 | # Keyvan Minoukadeh <keyvan@k1m.com>, which is great but more than we need
|
---|
| 1828 | # for MagpieRSS's purposes. The class has been stripped down to a single
|
---|
| 1829 | # public method: Relative_URI::resolve($url, $base), which resolves the URI in
|
---|
| 1830 | # $url relative to the URI in $base
|
---|
| 1831 | #
|
---|
| 1832 | # FeedWordPress also uses this class. So if we have it loaded in, don't load it
|
---|
| 1833 | # again.
|
---|
| 1834 | #
|
---|
| 1835 | # -- Charles Johnson <technophilia@radgeek.com>
|
---|
| 1836 | if (!class_exists('Relative_URI')) {
|
---|
| 1837 | class Relative_URI
|
---|
| 1838 | {
|
---|
| 1839 | // Resolve relative URI in $url against the base URI in $base. If $base
|
---|
| 1840 | // is not supplied, then we use the REQUEST_URI of this script.
|
---|
| 1841 | //
|
---|
| 1842 | // I'm hoping this method reflects RFC 2396 Section 5.2
|
---|
| 1843 | function resolve ($url, $base = NULL)
|
---|
| 1844 | {
|
---|
| 1845 | if (is_null($base)):
|
---|
| 1846 | $base = 'http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'];
|
---|
| 1847 | endif;
|
---|
| 1848 |
|
---|
| 1849 | $base = Relative_URI::_encode(trim($base));
|
---|
| 1850 | $uri_parts = Relative_URI::_parse_url($base);
|
---|
| 1851 |
|
---|
| 1852 | $url = Relative_URI::_encode(trim($url));
|
---|
| 1853 | $parts = Relative_URI::_parse_url($url);
|
---|
| 1854 |
|
---|
| 1855 | $uri_parts['fragment'] = (isset($parts['fragment']) ? $parts['fragment'] : null);
|
---|
| 1856 | $uri_parts['query'] = (isset($parts['query']) ? $parts['query'] : null);
|
---|
| 1857 |
|
---|
| 1858 | // if path is empty, and scheme, host, and query are undefined,
|
---|
| 1859 | // the URL is referring the base URL
|
---|
| 1860 |
|
---|
| 1861 | if (($parts['path'] == '') && !isset($parts['scheme']) && !isset($parts['host']) && !isset($parts['query'])) {
|
---|
| 1862 | // If the URI is empty or only a fragment, return the base URI
|
---|
| 1863 | return $base . (isset($parts['fragment']) ? '#'.$parts['fragment'] : '');
|
---|
| 1864 | } elseif (isset($parts['scheme'])) {
|
---|
| 1865 | // If the scheme is set, then the URI is absolute.
|
---|
| 1866 | return $url;
|
---|
| 1867 | } elseif (isset($parts['host'])) {
|
---|
| 1868 | $uri_parts['host'] = $parts['host'];
|
---|
| 1869 | $uri_parts['path'] = $parts['path'];
|
---|
| 1870 | } else {
|
---|
| 1871 | // We have a relative path but not a host.
|
---|
| 1872 |
|
---|
| 1873 | // start ugly fix:
|
---|
| 1874 | // prepend slash to path if base host is set, base path is not set, and url path is not absolute
|
---|
| 1875 | if ($uri_parts['host'] && ($uri_parts['path'] == '')
|
---|
| 1876 | && (strlen($parts['path']) > 0)
|
---|
| 1877 | && (substr($parts['path'], 0, 1) != '/')) {
|
---|
| 1878 | $parts['path'] = '/'.$parts['path'];
|
---|
| 1879 | } // end ugly fix
|
---|
| 1880 |
|
---|
| 1881 | if (substr($parts['path'], 0, 1) == '/') {
|
---|
| 1882 | $uri_parts['path'] = $parts['path'];
|
---|
| 1883 | } else {
|
---|
| 1884 | // copy base path excluding any characters after the last (right-most) slash character
|
---|
| 1885 | $buffer = substr($uri_parts['path'], 0, (int)strrpos($uri_parts['path'], '/')+1);
|
---|
| 1886 | // append relative path
|
---|
| 1887 | $buffer .= $parts['path'];
|
---|
| 1888 | // remove "./" where "." is a complete path segment.
|
---|
| 1889 | $buffer = str_replace('/./', '/', $buffer);
|
---|
| 1890 | if (substr($buffer, 0, 2) == './') {
|
---|
| 1891 | $buffer = substr($buffer, 2);
|
---|
| 1892 | }
|
---|
| 1893 | // if buffer ends with "." as a complete path segment, remove it
|
---|
| 1894 | if (substr($buffer, -2) == '/.') {
|
---|
| 1895 | $buffer = substr($buffer, 0, -1);
|
---|
| 1896 | }
|
---|
| 1897 | // remove "<segment>/../" where <segment> is a complete path segment not equal to ".."
|
---|
| 1898 | $search_finished = false;
|
---|
| 1899 | $segment = explode('/', $buffer);
|
---|
| 1900 | while (!$search_finished) {
|
---|
| 1901 | for ($x=0; $x+1 < count($segment);) {
|
---|
| 1902 | if (($segment[$x] != '') && ($segment[$x] != '..') && ($segment[$x+1] == '..')) {
|
---|
| 1903 | if ($x+2 == count($segment)) $segment[] = '';
|
---|
| 1904 | unset($segment[$x], $segment[$x+1]);
|
---|
| 1905 | $segment = array_values($segment);
|
---|
| 1906 | continue 2;
|
---|
| 1907 | } else {
|
---|
| 1908 | $x++;
|
---|
| 1909 | }
|
---|
| 1910 | }
|
---|
| 1911 | $search_finished = true;
|
---|
| 1912 | }
|
---|
| 1913 | $buffer = (count($segment) == 1) ? '/' : implode('/', $segment);
|
---|
| 1914 | $uri_parts['path'] = $buffer;
|
---|
| 1915 |
|
---|
| 1916 | }
|
---|
| 1917 | }
|
---|
| 1918 |
|
---|
| 1919 | // If we've gotten to this point, we can try to put the pieces
|
---|
| 1920 | // back together.
|
---|
| 1921 | $ret = '';
|
---|
| 1922 | if (isset($uri_parts['scheme'])) $ret .= $uri_parts['scheme'].':';
|
---|
| 1923 | if (isset($uri_parts['user'])) {
|
---|
| 1924 | $ret .= $uri_parts['user'];
|
---|
| 1925 | if (isset($uri_parts['pass'])) $ret .= ':'.$uri_parts['parts'];
|
---|
| 1926 | $ret .= '@';
|
---|
| 1927 | }
|
---|
| 1928 | if (isset($uri_parts['host'])) {
|
---|
| 1929 | $ret .= '//'.$uri_parts['host'];
|
---|
| 1930 | if (isset($uri_parts['port'])) $ret .= ':'.$uri_parts['port'];
|
---|
| 1931 | }
|
---|
| 1932 | $ret .= $uri_parts['path'];
|
---|
| 1933 | if (isset($uri_parts['query'])) $ret .= '?'.$uri_parts['query'];
|
---|
| 1934 | if (isset($uri_parts['fragment'])) $ret .= '#'.$uri_parts['fragment'];
|
---|
| 1935 |
|
---|
| 1936 | return $ret;
|
---|
| 1937 | }
|
---|
| 1938 |
|
---|
| 1939 | /**
|
---|
| 1940 | * Parse URL
|
---|
| 1941 | *
|
---|
| 1942 | * Regular expression grabbed from RFC 2396 Appendix B.
|
---|
| 1943 | * This is a replacement for PHPs builtin parse_url().
|
---|
| 1944 | * @param string $url
|
---|
| 1945 | * @access private
|
---|
| 1946 | * @return array
|
---|
| 1947 | */
|
---|
| 1948 | function _parse_url($url)
|
---|
| 1949 | {
|
---|
| 1950 | // I'm using this pattern instead of parse_url() as there's a few strings where parse_url()
|
---|
| 1951 | // generates a warning.
|
---|
| 1952 | if (preg_match('!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!', $url, $match)) {
|
---|
| 1953 | $parts = array();
|
---|
| 1954 | if ($match[1] != '') $parts['scheme'] = $match[2];
|
---|
| 1955 | if ($match[3] != '') $parts['auth'] = $match[4];
|
---|
| 1956 | // parse auth
|
---|
| 1957 | if (isset($parts['auth'])) {
|
---|
| 1958 | // store user info
|
---|
| 1959 | if (($at_pos = strpos($parts['auth'], '@')) !== false) {
|
---|
| 1960 | $userinfo = explode(':', substr($parts['auth'], 0, $at_pos), 2);
|
---|
| 1961 | $parts['user'] = $userinfo[0];
|
---|
| 1962 | if (isset($userinfo[1])) $parts['pass'] = $userinfo[1];
|
---|
| 1963 | $parts['auth'] = substr($parts['auth'], $at_pos+1);
|
---|
| 1964 | }
|
---|
| 1965 | // get port number
|
---|
| 1966 | if ($port_pos = strrpos($parts['auth'], ':')) {
|
---|
| 1967 | $parts['host'] = substr($parts['auth'], 0, $port_pos);
|
---|
| 1968 | $parts['port'] = (int)substr($parts['auth'], $port_pos+1);
|
---|
| 1969 | if ($parts['port'] < 1) $parts['port'] = null;
|
---|
| 1970 | } else {
|
---|
| 1971 | $parts['host'] = $parts['auth'];
|
---|
| 1972 | }
|
---|
| 1973 | }
|
---|
| 1974 | unset($parts['auth']);
|
---|
| 1975 | $parts['path'] = $match[5];
|
---|
| 1976 | if (isset($match[6]) && ($match[6] != '')) $parts['query'] = $match[7];
|
---|
| 1977 | if (isset($match[8]) && ($match[8] != '')) $parts['fragment'] = $match[9];
|
---|
| 1978 | return $parts;
|
---|
| 1979 | }
|
---|
| 1980 | // shouldn't reach here
|
---|
| 1981 | return array('path'=>'');
|
---|
| 1982 | }
|
---|
| 1983 |
|
---|
| 1984 | function _encode($string)
|
---|
| 1985 | {
|
---|
| 1986 | static $replace = array();
|
---|
| 1987 | if (!count($replace)) {
|
---|
| 1988 | $find = array(32, 34, 60, 62, 123, 124, 125, 91, 92, 93, 94, 96, 127);
|
---|
| 1989 | $find = array_merge(range(0, 31), $find);
|
---|
| 1990 | $find = array_map('chr', $find);
|
---|
| 1991 | foreach ($find as $char) {
|
---|
| 1992 | $replace[$char] = '%'.bin2hex($char);
|
---|
| 1993 | }
|
---|
| 1994 | }
|
---|
| 1995 | // escape control characters and a few other characters
|
---|
| 1996 | $encoded = strtr($string, $replace);
|
---|
| 1997 | // remove any character outside the hex range: 21 - 7E (see www.asciitable.com)
|
---|
| 1998 | return preg_replace('/[^\x21-\x7e]/', '', $encoded);
|
---|
| 1999 | }
|
---|
| 2000 | } // class Relative_URI
|
---|
| 2001 | }
|
---|
| 2002 |
|
---|
| 2003 | ################################################################################
|
---|
| 2004 | ## WordPress: wp_rss(), get_rss() ##############################################
|
---|
| 2005 | ################################################################################
|
---|
| 2006 |
|
---|
| 2007 | function wp_rss ($url, $num) {
|
---|
| 2008 | //ini_set("display_errors", false); uncomment to suppress php errors thrown if the feed is not returned.
|
---|
| 2009 | $num_items = $num;
|
---|
| 2010 | $rss = fetch_rss($url);
|
---|
| 2011 | if ( $rss ) {
|
---|
| 2012 | echo "<ul>";
|
---|
| 2013 | $rss->items = array_slice($rss->items, 0, $num_items);
|
---|
| 2014 | foreach ($rss->items as $item ) {
|
---|
| 2015 | echo "<li>\n";
|
---|
| 2016 | echo "<a href='$item[link]' title='$item[description]'>";
|
---|
| 2017 | echo htmlentities($item['title']);
|
---|
| 2018 | echo "</a><br />\n";
|
---|
| 2019 | echo "</li>\n";
|
---|
| 2020 | }
|
---|
| 2021 | echo "</ul>";
|
---|
| 2022 | }
|
---|
| 2023 | else {
|
---|
| 2024 | echo "an error has occured the feed is probably down, try again later.";
|
---|
| 2025 | }
|
---|
| 2026 | }
|
---|
| 2027 |
|
---|
| 2028 | function get_rss ($uri, $num = 5) { // Like get posts, but for RSS
|
---|
| 2029 | $rss = fetch_rss($url);
|
---|
| 2030 | if ( $rss ) {
|
---|
| 2031 | $rss->items = array_slice($rss->items, 0, $num_items);
|
---|
| 2032 | foreach ($rss->items as $item ) {
|
---|
| 2033 | echo "<li>\n";
|
---|
| 2034 | echo "<a href='$item[link]' title='$item[description]'>";
|
---|
| 2035 | echo htmlentities($item['title']);
|
---|
| 2036 | echo "</a><br />\n";
|
---|
| 2037 | echo "</li>\n";
|
---|
| 2038 | }
|
---|
| 2039 | return $posts;
|
---|
| 2040 | } else {
|
---|
| 2041 | return false;
|
---|
| 2042 | }
|
---|
| 2043 | }
|
---|
| 2044 | ?> |
---|