1 | <?php
|
---|
2 | /* Project: MagpieRSS: a simple RSS integration tool
|
---|
3 | * File: A compiled file for RSS syndication
|
---|
4 | * Author: Kellan Elliot-McCrea <kellan@protest.net>
|
---|
5 | * WordPress development team <http://www.wordpress.org/>
|
---|
6 | * Charles Johnson <technophilia@radgeek.com>
|
---|
7 | * Version: 2009.0618
|
---|
8 | * License: GPL
|
---|
9 | *
|
---|
10 | * Provenance:
|
---|
11 | *
|
---|
12 | * This is a drop-in replacement for the `rss-functions.php` provided with the
|
---|
13 | * WordPress 1.5 distribution, which upgrades the version of MagpieRSS from 0.51
|
---|
14 | * to 0.8a. The update improves handling of character encoding, supports
|
---|
15 | * multiple categories for posts (using <dc:subject> or <category>), supports
|
---|
16 | * Atom 1.0, and implements many other useful features. The file is derived from
|
---|
17 | * a combination of (1) the WordPress development team's modifications to
|
---|
18 | * MagpieRSS 0.51 and (2) the latest bleeding-edge updates to the "official"
|
---|
19 | * MagpieRSS software, including Kellan's original work and some substantial
|
---|
20 | * updates by Charles Johnson. All possible through the magic of the GPL. Yay
|
---|
21 | * for free software!
|
---|
22 | *
|
---|
23 | * Differences from the main branch of MagpieRSS:
|
---|
24 | *
|
---|
25 | * 1. Everything in rss_parse.inc, rss_fetch.inc, rss_cache.inc, and
|
---|
26 | * rss_utils.inc is included in one file.
|
---|
27 | *
|
---|
28 | * 2. MagpieRSS returns the WordPress version as the user agent, rather than
|
---|
29 | * Magpie
|
---|
30 | *
|
---|
31 | * 3. class RSSCache is a modified version by WordPress developers, which
|
---|
32 | * caches feeds in the WordPress database (in the options table), rather
|
---|
33 | * than writing external files directly.
|
---|
34 | *
|
---|
35 | * 4. There are two WordPress-specific functions, get_rss() and wp_rss()
|
---|
36 | *
|
---|
37 | * Differences from the version of MagpieRSS packaged with WordPress:
|
---|
38 | *
|
---|
39 | * 1. Support for translation between multiple character encodings. Under
|
---|
40 | * PHP 5 this is very nicely handled by the XML parsing library. Under PHP
|
---|
41 | * 4 we need to do a little bit of work ourselves, using either iconv or
|
---|
42 | * mb_convert_encoding if it is not one of the (extremely limited) number
|
---|
43 | * of character sets that PHP 4's XML module can handle natively.
|
---|
44 | *
|
---|
45 | * 2. Numerous bug fixes.
|
---|
46 | *
|
---|
47 | * 3. The parser class MagpieRSS has been substantially revised to better
|
---|
48 | * support popular features such as enclosures and multiple categories,
|
---|
49 | * and to support the new Atom 1.0 IETF standard. (Atom feeds are
|
---|
50 | * normalized so as to make the data available using terminology from
|
---|
51 | * either Atom 0.3 or Atom 1.0. Atom 0.3 backward-compatibility is provided
|
---|
52 | * to allow existing software to easily begin accepting Atom 1.0 data; new
|
---|
53 | * software SHOULD NOT depend on the 0.3 terminology, but rather use the
|
---|
54 | * normalization as a convenient way to keep supporting 0.3 feeds while
|
---|
55 | * they linger in the world.)
|
---|
56 | *
|
---|
57 | * The upgraded MagpieRSS can also now handle some content constructs that
|
---|
58 | * had not been handled well by previous versions of Magpie (such as the
|
---|
59 | * use of namespaced XHTML in <xhtml:body> or <xhtml:div> elements to
|
---|
60 | * provide the full content of posts in RSS 2.0 feeds).
|
---|
61 | *
|
---|
62 | * Unlike previous versions of MagpieRSS, this version can parse multiple
|
---|
63 | * instances of the same child element in item/entry and channel/feed
|
---|
64 | * containers. This is done using simple counters next to the element
|
---|
65 | * names: the first <category> element on an RSS item, for example, can be
|
---|
66 | * found in $item['category'] (thus preserving backward compatibility); the
|
---|
67 | * second in $item['category#2'], the third in $item['category#3'], and so
|
---|
68 | * on. The number of categories applied to the item can be found in
|
---|
69 | * $item['category#']
|
---|
70 | *
|
---|
71 | * Also unlike previous versions of MagpieRSS, this version allows you to
|
---|
72 | * access the values of elements' attributes as well as the content they
|
---|
73 | * contain. This can be done using a simple syntax inspired by XPath: to
|
---|
74 | * access the type attribute of an RSS 2.0 enclosure, for example, you
|
---|
75 | * need only access `$item['enclosure@type']`. A comma-separated list of
|
---|
76 | * attributes for the enclosure element is stored in `$item['enclosure@']`.
|
---|
77 | * (This syntax interacts easily with the syntax for multiple categories;
|
---|
78 | * for example, the value of the `scheme` attribute for the fourth category
|
---|
79 | * element on a particular item is stored in `$item['category#4@scheme']`.)
|
---|
80 | *
|
---|
81 | * Note also that this implementation IS NOT backward-compatible with the
|
---|
82 | * kludges that were used to hack in support for multiple categories and
|
---|
83 | * for enclosures in upgraded versions of MagpieRSS distributed with
|
---|
84 | * previous versions of FeedWordPress. If your hacks or filter plugins
|
---|
85 | * depended on the old way of doing things... well, I warned you that they
|
---|
86 | * might not be permanent. Sorry!
|
---|
87 | */
|
---|
88 |
|
---|
89 | define('RSS', 'RSS');
|
---|
90 | define('ATOM', 'Atom');
|
---|
91 |
|
---|
92 | ################################################################################
|
---|
93 | ## WordPress: make some settings WordPress-appropriate #########################
|
---|
94 | ################################################################################
|
---|
95 |
|
---|
96 | define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version . '(+http://www.wordpress.org)');
|
---|
97 |
|
---|
98 | $wp_encoding = get_settings('blog_charset');
|
---|
99 | define('MAGPIE_OUTPUT_ENCODING', ($wp_encoding?$wp_encoding:'ISO-8859-1'));
|
---|
100 |
|
---|
101 | ################################################################################
|
---|
102 | ## rss_parse.inc: from MagpieRSS 0.85 ##########################################
|
---|
103 | ################################################################################
|
---|
104 |
|
---|
105 | /**
|
---|
106 | * Hybrid parser, and object, takes RSS as a string and returns a simple object.
|
---|
107 | *
|
---|
108 | * see: rss_fetch.inc for a simpler interface with integrated caching support
|
---|
109 | *
|
---|
110 | */
|
---|
111 | class MagpieRSS {
|
---|
112 | var $parser;
|
---|
113 |
|
---|
114 | var $current_item = array(); // item currently being parsed
|
---|
115 | var $items = array(); // collection of parsed items
|
---|
116 | var $channel = array(); // hash of channel fields
|
---|
117 | var $textinput = array();
|
---|
118 | var $image = array();
|
---|
119 | var $feed_type;
|
---|
120 | var $feed_version;
|
---|
121 | var $encoding = ''; // output encoding of parsed rss
|
---|
122 |
|
---|
123 | var $_source_encoding = ''; // only set if we have to parse xml prolog
|
---|
124 |
|
---|
125 | var $ERROR = "";
|
---|
126 | var $WARNING = "";
|
---|
127 |
|
---|
128 | // define some constants
|
---|
129 | var $_XMLNS_FAMILIAR = array (
|
---|
130 | 'http://www.w3.org/2005/Atom' => 'atom' /* 1.0 */,
|
---|
131 | 'http://purl.org/atom/ns#' => 'atom' /* pre-1.0 */,
|
---|
132 | 'http://purl.org/rss/1.0/' => 'rss' /* 1.0 */,
|
---|
133 | 'http://backend.userland.com/RSS2' => 'rss' /* 2.0 */,
|
---|
134 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
|
---|
135 | 'http://www.w3.org/1999/xhtml' => 'xhtml',
|
---|
136 | 'http://purl.org/dc/elements/1.1/' => 'dc',
|
---|
137 | 'http://purl.org/dc/terms/' => 'dcterms',
|
---|
138 | 'http://purl.org/rss/1.0/modules/content/' => 'content',
|
---|
139 | 'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
|
---|
140 | 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
|
---|
141 | 'http://purl.org/rss/1.0/modules/dc/' => 'dc',
|
---|
142 | 'http://wellformedweb.org/CommentAPI/' => 'wfw',
|
---|
143 | 'http://webns.net/mvcb/' => 'admin',
|
---|
144 | 'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
|
---|
145 | 'http://xmlns.com/foaf/0.1/' => 'foaf',
|
---|
146 | 'http://madskills.com/public/xml/rss/module/trackback/' => 'trackback',
|
---|
147 | 'http://web.resource.org/cc/' => 'cc',
|
---|
148 | 'http://search.yahoo.com/mrss' => 'media',
|
---|
149 | );
|
---|
150 |
|
---|
151 | var $_XMLBASE_RESOLVE = array (
|
---|
152 | // Atom 0.3 and 1.0 xml:base support
|
---|
153 | 'atom' => array (
|
---|
154 | 'link' => array ('href' => true),
|
---|
155 | 'content' => array ('src' => true, '*xml' => true, '*html' => true),
|
---|
156 | 'summary' => array ('*xml' => true, '*html' => true),
|
---|
157 | 'title' => array ('*xml' => true, '*html' => true),
|
---|
158 | 'rights' => array ('*xml' => true, '*html' => true),
|
---|
159 | 'subtitle' => array ('*xml' => true, '*html' => true),
|
---|
160 | 'info' => array('*xml' => true, '*html' => true),
|
---|
161 | 'tagline' => array('*xml' => true, '*html' => true),
|
---|
162 | 'copyright' => array ('*xml' => true, '*html' => true),
|
---|
163 | 'generator' => array ('uri' => true, 'url' => true),
|
---|
164 | 'uri' => array ('*content' => true),
|
---|
165 | 'url' => array ('*content' => true),
|
---|
166 | 'icon' => array ('*content' => true),
|
---|
167 | 'logo' => array ('*content' => true),
|
---|
168 | ),
|
---|
169 |
|
---|
170 | // for inline namespaced XHTML
|
---|
171 | 'xhtml' => array (
|
---|
172 | 'a' => array ('href' => true),
|
---|
173 | 'applet' => array('codebase' => true),
|
---|
174 | 'area' => array('href' => true),
|
---|
175 | 'blockquote' => array('cite' => true),
|
---|
176 | 'body' => array('background' => true),
|
---|
177 | 'del' => array('cite' => true),
|
---|
178 | 'form' => array('action' => true),
|
---|
179 | 'frame' => array('longdesc' => true, 'src' => true),
|
---|
180 | 'iframe' => array('longdesc' => true, 'iframe' => true, 'src' => true),
|
---|
181 | 'head' => array('profile' => true),
|
---|
182 | 'img' => array('longdesc' => true, 'src' => true, 'usemap' => true),
|
---|
183 | 'input' => array('src' => true, 'usemap' => true),
|
---|
184 | 'ins' => array('cite' => true),
|
---|
185 | 'link' => array('href' => true),
|
---|
186 | 'object' => array('classid' => true, 'codebase' => true, 'data' => true, 'usemap' => true),
|
---|
187 | 'q' => array('cite' => true),
|
---|
188 | 'script' => array('src' => true),
|
---|
189 | ),
|
---|
190 | );
|
---|
191 |
|
---|
192 | var $_ATOM_CONTENT_CONSTRUCTS = array(
|
---|
193 | 'content', 'summary', 'title', /* common */
|
---|
194 | 'info', 'tagline', 'copyright', /* Atom 0.3 */
|
---|
195 | 'rights', 'subtitle', /* Atom 1.0 */
|
---|
196 | );
|
---|
197 | var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div');
|
---|
198 | var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
|
---|
199 |
|
---|
200 | // parser variables, useless if you're not a parser, treat as private
|
---|
201 | var $stack = array('element' => array (), 'ns' => array (), 'xmlns' => array (), 'xml:base' => array ()); // stack of XML data
|
---|
202 |
|
---|
203 | var $inchannel = false;
|
---|
204 | var $initem = false;
|
---|
205 |
|
---|
206 | var $incontent = array(); // non-empty if in namespaced XML content field
|
---|
207 | var $xml_escape = false; // true when accepting namespaced XML
|
---|
208 | var $exclude_top = false; // true when Atom 1.0 type="xhtml"
|
---|
209 |
|
---|
210 | var $intextinput = false;
|
---|
211 | var $inimage = false;
|
---|
212 | var $root_namespaces = array();
|
---|
213 | var $current_namespace = false;
|
---|
214 | var $working_namespace_table = array();
|
---|
215 |
|
---|
216 | /**
|
---|
217 | * Set up XML parser, parse source, and return populated RSS object..
|
---|
218 | *
|
---|
219 | * @param string $source string containing the RSS to be parsed
|
---|
220 | *
|
---|
221 | * NOTE: Probably a good idea to leave the encoding options alone unless
|
---|
222 | * you know what you're doing as PHP's character set support is
|
---|
223 | * a little weird.
|
---|
224 | *
|
---|
225 | * NOTE: A lot of this is unnecessary but harmless with PHP5
|
---|
226 | *
|
---|
227 | *
|
---|
228 | * @param string $output_encoding output the parsed RSS in this character
|
---|
229 | * set defaults to ISO-8859-1 as this is PHP's
|
---|
230 | * default.
|
---|
231 | *
|
---|
232 | * NOTE: might be changed to UTF-8 in future
|
---|
233 | * versions.
|
---|
234 | *
|
---|
235 | * @param string $input_encoding the character set of the incoming RSS source.
|
---|
236 | * Leave blank and Magpie will try to figure it
|
---|
237 | * out.
|
---|
238 | *
|
---|
239 | *
|
---|
240 | * @param bool $detect_encoding if false Magpie won't attempt to detect
|
---|
241 | * source encoding. (caveat emptor)
|
---|
242 | *
|
---|
243 | */
|
---|
244 | function MagpieRSS ($source, $output_encoding='ISO-8859-1',
|
---|
245 | $input_encoding=null, $detect_encoding=true, $base_uri=null)
|
---|
246 | {
|
---|
247 | # if PHP xml isn't compiled in, die
|
---|
248 | #
|
---|
249 | if (!function_exists('xml_parser_create')) {
|
---|
250 | $this->error( "Failed to load PHP's XML Extension. " .
|
---|
251 | "http://www.php.net/manual/en/ref.xml.php",
|
---|
252 | E_USER_ERROR );
|
---|
253 | }
|
---|
254 |
|
---|
255 | list($parser, $source) = $this->create_parser($source,
|
---|
256 | $output_encoding, $input_encoding, $detect_encoding);
|
---|
257 |
|
---|
258 |
|
---|
259 | if (!is_resource($parser)) {
|
---|
260 | $this->error( "Failed to create an instance of PHP's XML parser. " .
|
---|
261 | "http://www.php.net/manual/en/ref.xml.php",
|
---|
262 | E_USER_ERROR );
|
---|
263 | }
|
---|
264 |
|
---|
265 |
|
---|
266 | $this->parser = $parser;
|
---|
267 |
|
---|
268 | # pass in parser, and a reference to this object
|
---|
269 | # setup handlers
|
---|
270 | #
|
---|
271 | xml_set_object( $this->parser, $this );
|
---|
272 | xml_set_element_handler($this->parser,
|
---|
273 | 'feed_start_element', 'feed_end_element' );
|
---|
274 |
|
---|
275 | xml_set_character_data_handler( $this->parser, 'feed_cdata' );
|
---|
276 |
|
---|
277 | $this->stack['xml:base'] = array($base_uri);
|
---|
278 |
|
---|
279 | $status = xml_parse( $this->parser, $source );
|
---|
280 |
|
---|
281 | if (! $status ) {
|
---|
282 | $errorcode = xml_get_error_code( $this->parser );
|
---|
283 | if ( $errorcode != XML_ERROR_NONE ) {
|
---|
284 | $xml_error = xml_error_string( $errorcode );
|
---|
285 | $error_line = xml_get_current_line_number($this->parser);
|
---|
286 | $error_col = xml_get_current_column_number($this->parser);
|
---|
287 | $errormsg = "$xml_error at line $error_line, column $error_col";
|
---|
288 |
|
---|
289 | $this->error( $errormsg );
|
---|
290 | }
|
---|
291 | }
|
---|
292 |
|
---|
293 | xml_parser_free( $this->parser );
|
---|
294 |
|
---|
295 | $this->normalize();
|
---|
296 | }
|
---|
297 |
|
---|
298 | function feed_start_element($p, $element, &$attributes) {
|
---|
299 | $el = strtolower($element);
|
---|
300 |
|
---|
301 | $namespaces = end($this->stack['xmlns']);
|
---|
302 | $baseuri = end($this->stack['xml:base']);
|
---|
303 |
|
---|
304 | if (isset($attributes['xml:base'])) {
|
---|
305 | $baseuri = Relative_URI::resolve($attributes['xml:base'], $baseuri);
|
---|
306 | }
|
---|
307 | array_push($this->stack['xml:base'], $baseuri);
|
---|
308 |
|
---|
309 | // scan for xml namespace declarations. ugly ugly ugly.
|
---|
310 | // theoretically we could use xml_set_start_namespace_decl_handler and
|
---|
311 | // xml_set_end_namespace_decl_handler to handle this more elegantly, but
|
---|
312 | // support for these is buggy
|
---|
313 | foreach ($attributes as $attr => $value) {
|
---|
314 | if ( preg_match('/^xmlns(\:([A-Z_a-z].*))?$/', $attr, $match) ) {
|
---|
315 | $ns = (isset($match[2]) ? $match[2] : '');
|
---|
316 | $namespaces[$ns] = $value;
|
---|
317 | }
|
---|
318 | }
|
---|
319 |
|
---|
320 | array_push($this->stack['xmlns'], $namespaces);
|
---|
321 |
|
---|
322 | // check for a namespace, and split if found
|
---|
323 | // Don't munge content tags
|
---|
324 | $ns = $this->namespace($element);
|
---|
325 | if ( empty($this->incontent) ) {
|
---|
326 | $el = strtolower($ns['element']);
|
---|
327 | $this->current_namespace = $ns['effective'];
|
---|
328 | array_push($this->stack['ns'], $ns['effective']);
|
---|
329 | }
|
---|
330 |
|
---|
331 | $nsc = $ns['canonical']; $nse = $ns['element'];
|
---|
332 | if ( isset($this->_XMLBASE_RESOLVE[$nsc][$nse]) ) {
|
---|
333 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*xml'])) {
|
---|
334 | $attributes['xml:base'] = $baseuri;
|
---|
335 | }
|
---|
336 | foreach ($attributes as $key => $value) {
|
---|
337 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse][strtolower($key)])) {
|
---|
338 | $attributes[$key] = Relative_URI::resolve($attributes[$key], $baseuri);
|
---|
339 | }
|
---|
340 | }
|
---|
341 | }
|
---|
342 |
|
---|
343 | $attrs = array_change_key_case($attributes, CASE_LOWER);
|
---|
344 |
|
---|
345 | # if feed type isn't set, then this is first element of feed
|
---|
346 | # identify feed from root element
|
---|
347 | #
|
---|
348 | if (!isset($this->feed_type) ) {
|
---|
349 | if ( $el == 'rdf' ) {
|
---|
350 | $this->feed_type = RSS;
|
---|
351 | $this->root_namespaces = array('rss', 'rdf');
|
---|
352 | $this->feed_version = '1.0';
|
---|
353 | }
|
---|
354 | elseif ( $el == 'rss' ) {
|
---|
355 | $this->feed_type = RSS;
|
---|
356 | $this->root_namespaces = array('rss');
|
---|
357 | $this->feed_version = $attrs['version'];
|
---|
358 | }
|
---|
359 | elseif ( $el == 'feed' ) {
|
---|
360 | $this->feed_type = ATOM;
|
---|
361 | $this->root_namespaces = array('atom');
|
---|
362 | if ($ns['uri'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0
|
---|
363 | $this->feed_version = '1.0';
|
---|
364 | }
|
---|
365 | else { // Atom 0.3, probably.
|
---|
366 | $this->feed_version = $attrs['version'];
|
---|
367 | }
|
---|
368 | $this->inchannel = true;
|
---|
369 | }
|
---|
370 | return;
|
---|
371 | }
|
---|
372 |
|
---|
373 | // if we're inside a namespaced content construct, treat tags as text
|
---|
374 | if ( !empty($this->incontent) )
|
---|
375 | {
|
---|
376 | if ((count($this->incontent) > 1) or !$this->exclude_top) {
|
---|
377 | if ($ns['effective']=='xhtml') {
|
---|
378 | $tag = $ns['element'];
|
---|
379 | }
|
---|
380 | else {
|
---|
381 | $tag = $element;
|
---|
382 | $xmlns = 'xmlns';
|
---|
383 | if (strlen($ns['prefix'])>0) {
|
---|
384 | $xmlns = $xmlns . ':' . $ns['prefix'];
|
---|
385 | }
|
---|
386 | $attributes[$xmlns] = $ns['uri']; // make sure it's visible
|
---|
387 | }
|
---|
388 |
|
---|
389 | // if tags are inlined, then flatten
|
---|
390 | $attrs_str = join(' ',
|
---|
391 | array_map(array($this, 'map_attrs'),
|
---|
392 | array_keys($attributes),
|
---|
393 | array_values($attributes) )
|
---|
394 | );
|
---|
395 |
|
---|
396 | if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; }
|
---|
397 | $this->append_content( "<{$tag}{$attrs_str}>" );
|
---|
398 | }
|
---|
399 | array_push($this->incontent, $ns); // stack for parsing content XML
|
---|
400 | }
|
---|
401 |
|
---|
402 | elseif ( $el == 'channel' ) {
|
---|
403 | $this->inchannel = true;
|
---|
404 | }
|
---|
405 |
|
---|
406 | elseif ($el == 'item' or $el == 'entry' )
|
---|
407 | {
|
---|
408 | $this->initem = true;
|
---|
409 | if ( isset($attrs['rdf:about']) ) {
|
---|
410 | $this->current_item['about'] = $attrs['rdf:about'];
|
---|
411 | }
|
---|
412 | }
|
---|
413 |
|
---|
414 | // if we're in the default namespace of an RSS feed,
|
---|
415 | // record textinput or image fields
|
---|
416 | elseif (
|
---|
417 | $this->feed_type == RSS and
|
---|
418 | $this->current_namespace == '' and
|
---|
419 | $el == 'textinput' )
|
---|
420 | {
|
---|
421 | $this->intextinput = true;
|
---|
422 | }
|
---|
423 |
|
---|
424 | elseif (
|
---|
425 | $this->feed_type == RSS and
|
---|
426 | $this->current_namespace == '' and
|
---|
427 | $el == 'image' )
|
---|
428 | {
|
---|
429 | $this->inimage = true;
|
---|
430 | }
|
---|
431 |
|
---|
432 | // set stack[0] to current element
|
---|
433 | else {
|
---|
434 | // Atom support many links per containing element.
|
---|
435 | // Magpie treats link elements of type rel='alternate'
|
---|
436 | // as being equivalent to RSS's simple link element.
|
---|
437 |
|
---|
438 | $atom_link = false;
|
---|
439 | if ( ($ns['canonical']=='atom') and $el == 'link') {
|
---|
440 | $atom_link = true;
|
---|
441 | if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') {
|
---|
442 | $el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements
|
---|
443 | }
|
---|
444 | }
|
---|
445 | # handle atom content constructs
|
---|
446 | elseif ( ($ns['canonical']=='atom') and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
|
---|
447 | {
|
---|
448 | // avoid clashing w/ RSS mod_content
|
---|
449 | if ($el == 'content' ) {
|
---|
450 | $el = 'atom_content';
|
---|
451 | }
|
---|
452 |
|
---|
453 | // assume that everything accepts namespaced XML
|
---|
454 | // (that will pass through some non-validating feeds;
|
---|
455 | // but so what? this isn't a validating parser)
|
---|
456 | $this->incontent = array();
|
---|
457 | array_push($this->incontent, $ns); // start a stack
|
---|
458 |
|
---|
459 | $this->xml_escape = $this->accepts_namespaced_xml($attrs);
|
---|
460 |
|
---|
461 | if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') {
|
---|
462 | $this->exclude_top = true;
|
---|
463 | } else {
|
---|
464 | $this->exclude_top = false;
|
---|
465 | }
|
---|
466 | }
|
---|
467 | # Handle inline XHTML body elements --CWJ
|
---|
468 | elseif ($ns['effective']=='xhtml' and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS)) {
|
---|
469 | $this->current_namespace = 'xhtml';
|
---|
470 | $this->incontent = array();
|
---|
471 | array_push($this->incontent, $ns); // start a stack
|
---|
472 |
|
---|
473 | $this->xml_escape = true;
|
---|
474 | $this->exclude_top = false;
|
---|
475 | }
|
---|
476 |
|
---|
477 | array_unshift($this->stack['element'], $el);
|
---|
478 | $elpath = join('_', array_reverse($this->stack['element']));
|
---|
479 |
|
---|
480 | $n = $this->element_count($elpath);
|
---|
481 | $this->element_count($elpath, $n+1);
|
---|
482 |
|
---|
483 | if ($n > 0) {
|
---|
484 | array_shift($this->stack['element']);
|
---|
485 | array_unshift($this->stack['element'], $el.'#'.($n+1));
|
---|
486 | $elpath = join('_', array_reverse($this->stack['element']));
|
---|
487 | }
|
---|
488 |
|
---|
489 | // this makes the baby Jesus cry, but we can't do it in normalize()
|
---|
490 | // because we've made the element name for Atom links unpredictable
|
---|
491 | // by tacking on the relation to the end. -CWJ
|
---|
492 | if ($atom_link and isset($attrs['href'])) {
|
---|
493 | $this->append($elpath, $attrs['href']);
|
---|
494 | }
|
---|
495 |
|
---|
496 | // add attributes
|
---|
497 | if (count($attrs) > 0) {
|
---|
498 | $this->append($elpath.'@', join(',', array_keys($attrs)));
|
---|
499 | foreach ($attrs as $attr => $value) {
|
---|
500 | $this->append($elpath.'@'.$attr, $value);
|
---|
501 | }
|
---|
502 | }
|
---|
503 | }
|
---|
504 | }
|
---|
505 |
|
---|
506 | function feed_cdata ($p, $text) {
|
---|
507 | if ($this->incontent) {
|
---|
508 | if ($this->xml_escape) { $text = htmlspecialchars($text, ENT_COMPAT, $this->encoding); }
|
---|
509 | $this->append_content( $text );
|
---|
510 | } else {
|
---|
511 | $current_el = join('_', array_reverse($this->stack['element']));
|
---|
512 | $this->append($current_el, $text);
|
---|
513 | }
|
---|
514 | }
|
---|
515 |
|
---|
516 | function feed_end_element ($p, $el) {
|
---|
517 | $closer = $this->namespace($el);
|
---|
518 |
|
---|
519 | if ( $this->incontent ) {
|
---|
520 | $opener = array_pop($this->incontent);
|
---|
521 |
|
---|
522 | // balance tags properly
|
---|
523 | // note: i don't think this is actually neccessary
|
---|
524 | if ($opener != $closer) {
|
---|
525 | array_push($this->incontent, $opener);
|
---|
526 | $this->append_content("<$el />");
|
---|
527 | } elseif ($this->incontent) { // are we in the content construct still?
|
---|
528 | if ((count($this->incontent) > 1) or !$this->exclude_top) {
|
---|
529 | if ($closer['effective']=='xhtml') {
|
---|
530 | $tag = $closer['element'];
|
---|
531 | }
|
---|
532 | else {
|
---|
533 | $tag = $el;
|
---|
534 | }
|
---|
535 | $this->append_content("</$tag>");
|
---|
536 | }
|
---|
537 | } else { // if we're done with the content construct, shift the opening of the content construct off the normal stack
|
---|
538 | array_shift( $this->stack['element'] );
|
---|
539 | }
|
---|
540 | }
|
---|
541 | elseif ($closer['effective'] == '') {
|
---|
542 | $el = strtolower($closer['element']);
|
---|
543 | if ( $el == 'item' or $el == 'entry' ) {
|
---|
544 | $this->items[] = $this->current_item;
|
---|
545 | $this->current_item = array();
|
---|
546 | $this->initem = false;
|
---|
547 | $this->current_category = 0;
|
---|
548 | }
|
---|
549 | elseif ($this->feed_type == RSS and $el == 'textinput' ) {
|
---|
550 | $this->intextinput = false;
|
---|
551 | }
|
---|
552 | elseif ($this->feed_type == RSS and $el == 'image' ) {
|
---|
553 | $this->inimage = false;
|
---|
554 | }
|
---|
555 | elseif ($el == 'channel' or $el == 'feed' ) {
|
---|
556 | $this->inchannel = false;
|
---|
557 | } else {
|
---|
558 | $nsc = $closer['canonical']; $nse = $closer['element'];
|
---|
559 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
|
---|
560 | // Resolve relative URI in content of tag
|
---|
561 | $this->dereference_current_element();
|
---|
562 | }
|
---|
563 | array_shift( $this->stack['element'] );
|
---|
564 | }
|
---|
565 | } else {
|
---|
566 | $nsc = $closer['canonical']; $nse = strtolower($closer['element']);
|
---|
567 | if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
|
---|
568 | // Resolve relative URI in content of tag
|
---|
569 | $this->dereference_current_element();
|
---|
570 | }
|
---|
571 | array_shift( $this->stack['element'] );
|
---|
572 | }
|
---|
573 |
|
---|
574 | if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ
|
---|
575 | $this->current_namespace = array_pop($this->stack['ns']);
|
---|
576 | }
|
---|
577 | array_pop($this->stack['xmlns']);
|
---|
578 | array_pop($this->stack['xml:base']);
|
---|
579 | }
|
---|
580 |
|
---|
581 | // Namespace handling functions
|
---|
582 | function namespace ($element) {
|
---|
583 | $namespaces = end($this->stack['xmlns']);
|
---|
584 | $ns = '';
|
---|
585 | if ( strpos( $element, ':' ) ) {
|
---|
586 | list($ns, $element) = split( ':', $element, 2);
|
---|
587 | }
|
---|
588 |
|
---|
589 | $uri = (isset($namespaces[$ns]) ? $namespaces[$ns] : null);
|
---|
590 |
|
---|
591 | if (!is_null($uri)) {
|
---|
592 | $canonical = (
|
---|
593 | isset($this->_XMLNS_FAMILIAR[$uri])
|
---|
594 | ? $this->_XMLNS_FAMILIAR[$uri]
|
---|
595 | : $uri
|
---|
596 | );
|
---|
597 | } else {
|
---|
598 | $canonical = $ns;
|
---|
599 | }
|
---|
600 |
|
---|
601 | if (in_array($canonical, $this->root_namespaces)) {
|
---|
602 | $effective = '';
|
---|
603 | } else {
|
---|
604 | $effective = $canonical;
|
---|
605 | }
|
---|
606 |
|
---|
607 | return array('effective' => $effective, 'canonical' => $canonical, 'prefix' => $ns, 'uri' => $uri, 'element' => $element);
|
---|
608 | }
|
---|
609 |
|
---|
610 | // Utility functions for accessing data structure
|
---|
611 |
|
---|
612 | // for smart, namespace-aware methods...
|
---|
613 | function magpie_data ($el, $method, $text = NULL) {
|
---|
614 | $ret = NULL;
|
---|
615 | if ($el) {
|
---|
616 | if (is_array($method)) {
|
---|
617 | $el = $this->{$method['key']}($el);
|
---|
618 | $method = $method['value'];
|
---|
619 | }
|
---|
620 |
|
---|
621 | if ( $this->current_namespace ) {
|
---|
622 | if ( $this->initem ) {
|
---|
623 | $ret = $this->{$method} (
|
---|
624 | $this->current_item[ $this->current_namespace ][ $el ],
|
---|
625 | $text
|
---|
626 | );
|
---|
627 | }
|
---|
628 | elseif ($this->inchannel) {
|
---|
629 | $ret = $this->{$method} (
|
---|
630 | $this->channel[ $this->current_namespace][ $el ],
|
---|
631 | $text
|
---|
632 | );
|
---|
633 | }
|
---|
634 | elseif ($this->intextinput) {
|
---|
635 | $ret = $this->{$method} (
|
---|
636 | $this->textinput[ $this->current_namespace][ $el ],
|
---|
637 | $text
|
---|
638 | );
|
---|
639 | }
|
---|
640 | elseif ($this->inimage) {
|
---|
641 | $ret = $this->{$method} (
|
---|
642 | $this->image[ $this->current_namespace ][ $el ], $text );
|
---|
643 | }
|
---|
644 | }
|
---|
645 | else {
|
---|
646 | if ( $this->initem ) {
|
---|
647 | $ret = $this->{$method} (
|
---|
648 | $this->current_item[ $el ], $text);
|
---|
649 | }
|
---|
650 | elseif ($this->intextinput) {
|
---|
651 | $ret = $this->{$method} (
|
---|
652 | $this->textinput[ $el ], $text );
|
---|
653 | }
|
---|
654 | elseif ($this->inimage) {
|
---|
655 | $ret = $this->{$method} (
|
---|
656 | $this->image[ $el ], $text );
|
---|
657 | }
|
---|
658 | elseif ($this->inchannel) {
|
---|
659 | $ret = $this->{$method} (
|
---|
660 | $this->channel[ $el ], $text );
|
---|
661 | }
|
---|
662 | }
|
---|
663 | }
|
---|
664 | return $ret;
|
---|
665 | }
|
---|
666 |
|
---|
667 | function concat (&$str1, $str2="") {
|
---|
668 | if (!isset($str1) ) {
|
---|
669 | $str1="";
|
---|
670 | }
|
---|
671 | $str1 .= $str2;
|
---|
672 | }
|
---|
673 |
|
---|
674 | function retrieve_value (&$el, $text /*ignore*/) {
|
---|
675 | return $el;
|
---|
676 | }
|
---|
677 | function replace_value (&$el, $text) {
|
---|
678 | $el = $text;
|
---|
679 | }
|
---|
680 | function counter_key ($el) {
|
---|
681 | return $el.'#';
|
---|
682 | }
|
---|
683 |
|
---|
684 |
|
---|
685 | function append_content($text) {
|
---|
686 | $construct = reset($this->incontent);
|
---|
687 | $ns = $construct['effective'];
|
---|
688 |
|
---|
689 | // Keeping data about parent elements is necessary to
|
---|
690 | // properly handle atom:source and its children elements
|
---|
691 | $tag = join('_', array_reverse($this->stack['element']));
|
---|
692 |
|
---|
693 | if ( $this->initem ) {
|
---|
694 | if ($ns) {
|
---|
695 | $this->concat( $this->current_item[$ns][$tag], $text );
|
---|
696 | } else {
|
---|
697 | $this->concat( $this->current_item[$tag], $text );
|
---|
698 | }
|
---|
699 | }
|
---|
700 | elseif ( $this->inchannel ) {
|
---|
701 | if ($this->current_namespace) {
|
---|
702 | $this->concat( $this->channel[$ns][$tag], $text );
|
---|
703 | } else {
|
---|
704 | $this->concat( $this->channel[$tag], $text );
|
---|
705 | }
|
---|
706 | }
|
---|
707 | }
|
---|
708 |
|
---|
709 | // smart append - field and namespace aware
|
---|
710 | function append($el, $text) {
|
---|
711 | $this->magpie_data($el, 'concat', $text);
|
---|
712 | }
|
---|
713 |
|
---|
714 | function dereference_current_element () {
|
---|
715 | $el = join('_', array_reverse($this->stack['element']));
|
---|
716 | $base = end($this->stack['xml:base']);
|
---|
717 | $uri = $this->magpie_data($el, 'retrieve_value');
|
---|
718 | $this->magpie_data($el, 'replace_value', Relative_URI::resolve($uri, $base));
|
---|
719 | }
|
---|
720 |
|
---|
721 | // smart count - field and namespace aware
|
---|
722 | function element_count ($el, $set = NULL) {
|
---|
723 | if (!is_null($set)) {
|
---|
724 | $ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'replace_value'), $set);
|
---|
725 | }
|
---|
726 | $ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'retrieve_value'));
|
---|
727 | return ($ret ? $ret : 0);
|
---|
728 | }
|
---|
729 |
|
---|
730 | function normalize_enclosure (&$source, $from, &$dest, $to, $i) {
|
---|
731 | $id_from = $this->element_id($from, $i);
|
---|
732 | $id_to = $this->element_id($to, $i);
|
---|
733 | if (isset($source["{$id_from}@"])) {
|
---|
734 | foreach (explode(',', $source["{$id_from}@"]) as $attr) {
|
---|
735 | if ($from=='link_enclosure' and $attr=='href') { // from Atom
|
---|
736 | $dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"];
|
---|
737 | $dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
|
---|
738 | }
|
---|
739 | elseif ($from=='enclosure' and $attr=='url') { // from RSS
|
---|
740 | $dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"];
|
---|
741 | $dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
|
---|
742 | }
|
---|
743 | else {
|
---|
744 | $dest["{$id_to}@{$attr}"] = $source["{$id_from}@{$attr}"];
|
---|
745 | }
|
---|
746 | }
|
---|
747 | }
|
---|
748 | }
|
---|
749 |
|
---|
750 | function normalize_atom_person (&$source, $person, &$dest, $to, $i) {
|
---|
751 | $id = $this->element_id($person, $i);
|
---|
752 | $id_to = $this->element_id($to, $i);
|
---|
753 |
|
---|
754 | // Atom 0.3 <=> Atom 1.0
|
---|
755 | if ($this->feed_version >= 1.0) { $used = 'uri'; $norm = 'url'; }
|
---|
756 | else { $used = 'url'; $norm = 'uri'; }
|
---|
757 |
|
---|
758 | if (isset($source["{$id}_{$used}"])) {
|
---|
759 | $dest["{$id_to}_{$norm}"] = $source["{$id}_{$used}"];
|
---|
760 | }
|
---|
761 |
|
---|
762 | // Atom to RSS 2.0 and Dublin Core
|
---|
763 | // RSS 2.0 person strings should be valid e-mail addresses if possible.
|
---|
764 | if (isset($source["{$id}_email"])) {
|
---|
765 | $rss_author = $source["{$id}_email"];
|
---|
766 | }
|
---|
767 | if (isset($source["{$id}_name"])) {
|
---|
768 | $rss_author = $source["{$id}_name"]
|
---|
769 | . (isset($rss_author) ? " <$rss_author>" : '');
|
---|
770 | }
|
---|
771 | if (isset($rss_author)) {
|
---|
772 | $source[$id] = $rss_author; // goes to top-level author or contributor
|
---|
773 | $dest[$id_to] = $rss_author; // goes to dc:creator or dc:contributor
|
---|
774 | }
|
---|
775 | }
|
---|
776 |
|
---|
777 | // Normalize Atom 1.0 and RSS 2.0 categories to Dublin Core...
|
---|
778 | function normalize_category (&$source, $from, &$dest, $to, $i) {
|
---|
779 | $cat_id = $this->element_id($from, $i);
|
---|
780 | $dc_id = $this->element_id($to, $i);
|
---|
781 |
|
---|
782 | // first normalize category elements: Atom 1.0 <=> RSS 2.0
|
---|
783 | if ( isset($source["{$cat_id}@term"]) ) { // category identifier
|
---|
784 | $source[$cat_id] = $source["{$cat_id}@term"];
|
---|
785 | } elseif ( $this->feed_type == RSS ) {
|
---|
786 | $source["{$cat_id}@term"] = $source[$cat_id];
|
---|
787 | }
|
---|
788 |
|
---|
789 | if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy
|
---|
790 | $source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"];
|
---|
791 | } elseif ( isset($source["{$cat_id}@domain"]) ) {
|
---|
792 | $source["{$cat_id}@scheme"] = $source["{$cat_id}@domain"];
|
---|
793 | }
|
---|
794 |
|
---|
795 | // Now put the identifier into dc:subject
|
---|
796 | $dest[$dc_id] = $source[$cat_id];
|
---|
797 | }
|
---|
798 |
|
---|
799 | // ... or vice versa
|
---|
800 | function normalize_dc_subject (&$source, $from, &$dest, $to, $i) {
|
---|
801 | $dc_id = $this->element_id($from, $i);
|
---|
802 | $cat_id = $this->element_id($to, $i);
|
---|
803 |
|
---|
804 | $dest[$cat_id] = $source[$dc_id]; // RSS 2.0
|
---|
805 | $dest["{$cat_id}@term"] = $source[$dc_id]; // Atom 1.0
|
---|
806 | }
|
---|
807 |
|
---|
808 | // simplify the logic for normalize(). Makes sure that count of elements and
|
---|
809 | // each of multiple elements is normalized properly. If you need to mess
|
---|
810 | // with things like attributes or change formats or the like, pass it a
|
---|
811 | // callback to handle each element.
|
---|
812 | function normalize_element (&$source, $from, &$dest, $to, $via = NULL) {
|
---|
813 | if (isset($source[$from]) or isset($source["{$from}#"])) {
|
---|
814 | if (isset($source["{$from}#"])) {
|
---|
815 | $n = $source["{$from}#"];
|
---|
816 | $dest["{$to}#"] = $source["{$from}#"];
|
---|
817 | }
|
---|
818 | else { $n = 1; }
|
---|
819 |
|
---|
820 | for ($i = 1; $i <= $n; $i++) {
|
---|
821 | if (isset($via)) { // custom callback for ninja attacks
|
---|
822 | $this->{$via}($source, $from, $dest, $to, $i);
|
---|
823 | }
|
---|
824 | else { // just make it the same
|
---|
825 | $from_id = $this->element_id($from, $i);
|
---|
826 | $to_id = $this->element_id($to, $i);
|
---|
827 | $dest[$to_id] = $source[$from_id];
|
---|
828 | }
|
---|
829 | }
|
---|
830 | }
|
---|
831 | }
|
---|
832 |
|
---|
833 | function normalize () {
|
---|
834 | // if atom populate rss fields and normalize 0.3 and 1.0 feeds
|
---|
835 | if ( $this->is_atom() ) {
|
---|
836 | // Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!)
|
---|
837 | if ($this->feed_version < 1.0) {
|
---|
838 | $this->normalize_element($this->channel, 'tagline', $this->channel, 'subtitle');
|
---|
839 | $this->normalize_element($this->channel, 'copyright', $this->channel, 'rights');
|
---|
840 | $this->normalize_element($this->channel, 'modified', $this->channel, 'updated');
|
---|
841 | } else {
|
---|
842 | $this->normalize_element($this->channel, 'subtitle', $this->channel, 'tagline');
|
---|
843 | $this->normalize_element($this->channel, 'rights', $this->channel, 'copyright');
|
---|
844 | $this->normalize_element($this->channel, 'updated', $this->channel, 'modified');
|
---|
845 | }
|
---|
846 | $this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person');
|
---|
847 | $this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person');
|
---|
848 |
|
---|
849 | // Atom elements to RSS elements
|
---|
850 | $this->normalize_element($this->channel, 'subtitle', $this->channel, 'description');
|
---|
851 |
|
---|
852 | if ( isset($this->channel['logo']) ) {
|
---|
853 | $this->normalize_element($this->channel, 'logo', $this->image, 'url');
|
---|
854 | $this->normalize_element($this->channel, 'link', $this->image, 'link');
|
---|
855 | $this->normalize_element($this->channel, 'title', $this->image, 'title');
|
---|
856 | }
|
---|
857 |
|
---|
858 | for ( $i = 0; $i < count($this->items); $i++) {
|
---|
859 | $item = $this->items[$i];
|
---|
860 |
|
---|
861 | // Atom 1.0 elements <=> Atom 0.3 elements
|
---|
862 | if ($this->feed_version < 1.0) {
|
---|
863 | $this->normalize_element($item, 'modified', $item, 'updated');
|
---|
864 | $this->normalize_element($item, 'issued', $item, 'published');
|
---|
865 | } else {
|
---|
866 | $this->normalize_element($item, 'updated', $item, 'modified');
|
---|
867 | $this->normalize_element($item, 'published', $item, 'issued');
|
---|
868 | }
|
---|
869 |
|
---|
870 | // "If an atom:entry element does not contain
|
---|
871 | // atom:author elements, then the atom:author elements
|
---|
872 | // of the contained atom:source element are considered
|
---|
873 | // to apply. In an Atom Feed Document, the atom:author
|
---|
874 | // elements of the containing atom:feed element are
|
---|
875 | // considered to apply to the entry if there are no
|
---|
876 | // atom:author elements in the locations described
|
---|
877 | // above." <http://atompub.org/2005/08/17/draft-ietf-atompub-format-11.html#rfc.section.4.2.1>
|
---|
878 | if (!isset($item["author#"])) {
|
---|
879 | if (isset($item["source_author#"])) { // from aggregation source
|
---|
880 | $source = $item;
|
---|
881 | $author = "source_author";
|
---|
882 | } elseif (isset($this->channel["author#"])) { // from containing feed
|
---|
883 | $source = $this->channel;
|
---|
884 | $author = "author";
|
---|
885 | } else {
|
---|
886 | $author = null;
|
---|
887 | }
|
---|
888 |
|
---|
889 | if (!is_null($author)) {
|
---|
890 | $item["author#"] = $source["{$author}#"];
|
---|
891 | for ($au = 1; $au <= $item["author#"]; $au++) {
|
---|
892 | $id_to = $this->element_id('author', $au);
|
---|
893 | $id_from = $this->element_id($author, $au);
|
---|
894 |
|
---|
895 | $item[$id_to] = $source[$id_from];
|
---|
896 | foreach (array('name', 'email', 'uri', 'url') as $what) {
|
---|
897 | if (isset($source["{$id_from}_{$what}"])) {
|
---|
898 | $item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"];
|
---|
899 | }
|
---|
900 | }
|
---|
901 | }
|
---|
902 | }
|
---|
903 | }
|
---|
904 |
|
---|
905 | // Atom elements to RSS elements
|
---|
906 | $this->normalize_element($item, 'author', $item['dc'], 'creator', 'normalize_atom_person');
|
---|
907 | $this->normalize_element($item, 'contributor', $item['dc'], 'contributor', 'normalize_atom_person');
|
---|
908 | $this->normalize_element($item, 'summary', $item, 'description');
|
---|
909 | $this->normalize_element($item, 'atom_content', $item['content'], 'encoded');
|
---|
910 | $this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure');
|
---|
911 |
|
---|
912 | // Categories
|
---|
913 | if ( isset($item['category#']) ) { // Atom 1.0 categories to dc:subject and RSS 2.0 categories
|
---|
914 | $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
|
---|
915 | }
|
---|
916 | elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
|
---|
917 | $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
|
---|
918 | }
|
---|
919 |
|
---|
920 | // Normalized item timestamp
|
---|
921 | $atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated'];
|
---|
922 | if ( $atom_date ) {
|
---|
923 | $epoch = @parse_w3cdtf($atom_date);
|
---|
924 | if ($epoch and $epoch > 0) {
|
---|
925 | $item['date_timestamp'] = $epoch;
|
---|
926 | }
|
---|
927 | }
|
---|
928 |
|
---|
929 | $this->items[$i] = $item;
|
---|
930 | }
|
---|
931 | }
|
---|
932 | elseif ( $this->is_rss() ) {
|
---|
933 | // RSS elements to Atom elements
|
---|
934 | $this->normalize_element($this->channel, 'description', $this->channel, 'tagline'); // Atom 0.3
|
---|
935 | $this->normalize_element($this->channel, 'description', $this->channel, 'subtitle'); // Atom 1.0 (yay wordsmithing!)
|
---|
936 | $this->normalize_element($this->image, 'url', $this->channel, 'logo');
|
---|
937 |
|
---|
938 | for ( $i = 0; $i < count($this->items); $i++) {
|
---|
939 | $item = $this->items[$i];
|
---|
940 |
|
---|
941 | // RSS elements to Atom elements
|
---|
942 | $this->normalize_element($item, 'description', $item, 'summary');
|
---|
943 | $this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure');
|
---|
944 |
|
---|
945 | // Categories
|
---|
946 | if ( isset($item['category#']) ) { // RSS 2.0 categories to dc:subject and Atom 1.0 categories
|
---|
947 | $this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
|
---|
948 | }
|
---|
949 | elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
|
---|
950 | $this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
|
---|
951 | }
|
---|
952 |
|
---|
953 | // Normalized item timestamp
|
---|
954 | if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
|
---|
955 | $epoch = @parse_w3cdtf($item['dc']['date']);
|
---|
956 | if ($epoch and $epoch > 0) {
|
---|
957 | $item['date_timestamp'] = $epoch;
|
---|
958 | }
|
---|
959 | }
|
---|
960 | elseif ( isset($item['pubdate']) ) {
|
---|
961 | $epoch = @strtotime($item['pubdate']);
|
---|
962 | if ($epoch > 0) {
|
---|
963 | $item['date_timestamp'] = $epoch;
|
---|
964 | }
|
---|
965 | }
|
---|
966 |
|
---|
967 | $this->items[$i] = $item;
|
---|
968 | }
|
---|
969 | }
|
---|
970 | }
|
---|
971 |
|
---|
972 |
|
---|
973 | function is_rss () {
|
---|
974 | if ( $this->feed_type == RSS ) {
|
---|
975 | return $this->feed_version;
|
---|
976 | }
|
---|
977 | else {
|
---|
978 | return false;
|
---|
979 | }
|
---|
980 | }
|
---|
981 |
|
---|
982 | function is_atom() {
|
---|
983 | if ( $this->feed_type == ATOM ) {
|
---|
984 | return $this->feed_version;
|
---|
985 | }
|
---|
986 | else {
|
---|
987 | return false;
|
---|
988 | }
|
---|
989 | }
|
---|
990 |
|
---|
991 | /**
|
---|
992 | * return XML parser, and possibly re-encoded source
|
---|
993 | *
|
---|
994 | */
|
---|
995 | function create_parser($source, $out_enc, $in_enc, $detect) {
|
---|
996 | if ( substr(phpversion(),0,1) == 5) {
|
---|
997 | $parser = $this->php5_create_parser($in_enc, $detect);
|
---|
998 | }
|
---|
999 | else {
|
---|
1000 | list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
|
---|
1001 | }
|
---|
1002 | if ($out_enc) {
|
---|
1003 | $this->encoding = $out_enc;
|
---|
1004 | xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
|
---|
1005 | }
|
---|
1006 | xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
|
---|
1007 | return array($parser, $source);
|
---|
1008 | }
|
---|
1009 |
|
---|
1010 | /**
|
---|
1011 | * Instantiate an XML parser under PHP5
|
---|
1012 | *
|
---|
1013 | * PHP5 will do a fine job of detecting input encoding
|
---|
1014 | * if passed an empty string as the encoding.
|
---|
1015 | *
|
---|
1016 | * All hail libxml2!
|
---|
1017 | *
|
---|
1018 | */
|
---|
1019 | function php5_create_parser($in_enc, $detect) {
|
---|
1020 | // by default php5 does a fine job of detecting input encodings
|
---|
1021 | if(!$detect && $in_enc) {
|
---|
1022 | return xml_parser_create($in_enc);
|
---|
1023 | }
|
---|
1024 | else {
|
---|
1025 | return xml_parser_create('');
|
---|
1026 | }
|
---|
1027 | }
|
---|
1028 |
|
---|
1029 | /**
|
---|
1030 | * Instaniate an XML parser under PHP4
|
---|
1031 | *
|
---|
1032 | * Unfortunately PHP4's support for character encodings
|
---|
1033 | * and especially XML and character encodings sucks. As
|
---|
1034 | * long as the documents you parse only contain characters
|
---|
1035 | * from the ISO-8859-1 character set (a superset of ASCII,
|
---|
1036 | * and a subset of UTF-8) you're fine. However once you
|
---|
1037 | * step out of that comfy little world things get mad, bad,
|
---|
1038 | * and dangerous to know.
|
---|
1039 | *
|
---|
1040 | * The following code is based on SJM's work with FoF
|
---|
1041 | * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
|
---|
1042 | *
|
---|
1043 | */
|
---|
1044 | function php4_create_parser($source, $in_enc, $detect) {
|
---|
1045 | if ( !$detect ) {
|
---|
1046 | return array(xml_parser_create($in_enc), $source);
|
---|
1047 | }
|
---|
1048 |
|
---|
1049 | if (!$in_enc) {
|
---|
1050 | if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
|
---|
1051 | $in_enc = strtoupper($m[1]);
|
---|
1052 | $this->source_encoding = $in_enc;
|
---|
1053 | }
|
---|
1054 | else {
|
---|
1055 | $in_enc = 'UTF-8';
|
---|
1056 | }
|
---|
1057 | }
|
---|
1058 |
|
---|
1059 | if ($this->known_encoding($in_enc)) {
|
---|
1060 | return array(xml_parser_create($in_enc), $source);
|
---|
1061 | }
|
---|
1062 |
|
---|
1063 | // the dectected encoding is not one of the simple encodings PHP knows
|
---|
1064 |
|
---|
1065 | // attempt to use the iconv extension to
|
---|
1066 | // cast the XML to a known encoding
|
---|
1067 | // @see http://php.net/iconv
|
---|
1068 |
|
---|
1069 | if (function_exists('iconv')) {
|
---|
1070 | $encoded_source = iconv($in_enc,'UTF-8', $source);
|
---|
1071 | if ($encoded_source) {
|
---|
1072 | return array(xml_parser_create('UTF-8'), $encoded_source);
|
---|
1073 | }
|
---|
1074 | }
|
---|
1075 |
|
---|
1076 | // iconv didn't work, try mb_convert_encoding
|
---|
1077 | // @see http://php.net/mbstring
|
---|
1078 | if(function_exists('mb_convert_encoding')) {
|
---|
1079 | $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
|
---|
1080 | if ($encoded_source) {
|
---|
1081 | return array(xml_parser_create('UTF-8'), $encoded_source);
|
---|
1082 | }
|
---|
1083 | }
|
---|
1084 |
|
---|
1085 | // else
|
---|
1086 | $this->error("Feed is in an unsupported character encoding. ($in_enc) " .
|
---|
1087 | "You may see strange artifacts, and mangled characters.",
|
---|
1088 | E_USER_NOTICE);
|
---|
1089 |
|
---|
1090 | return array(xml_parser_create(), $source);
|
---|
1091 | }
|
---|
1092 |
|
---|
1093 | function known_encoding($enc) {
|
---|
1094 | $enc = strtoupper($enc);
|
---|
1095 | if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
|
---|
1096 | return $enc;
|
---|
1097 | }
|
---|
1098 | else {
|
---|
1099 | return false;
|
---|
1100 | }
|
---|
1101 | }
|
---|
1102 |
|
---|
1103 | function error ($errormsg, $lvl=E_USER_WARNING) {
|
---|
1104 | // append PHP's error message if track_errors enabled
|
---|
1105 | if ( isset($php_errormsg) ) {
|
---|
1106 | $errormsg .= " ($php_errormsg)";
|
---|
1107 | }
|
---|
1108 | if ( MAGPIE_DEBUG ) {
|
---|
1109 | trigger_error( $errormsg, $lvl);
|
---|
1110 | }
|
---|
1111 | else {
|
---|
1112 | error_log( $errormsg, 0);
|
---|
1113 | }
|
---|
1114 |
|
---|
1115 | $notices = E_USER_NOTICE|E_NOTICE;
|
---|
1116 | if ( $lvl&$notices ) {
|
---|
1117 | $this->WARNING = $errormsg;
|
---|
1118 | } else {
|
---|
1119 | $this->ERROR = $errormsg;
|
---|
1120 | }
|
---|
1121 | }
|
---|
1122 |
|
---|
1123 | // magic ID function for multiple elemenets.
|
---|
1124 | // can be called as static MagpieRSS::element_id()
|
---|
1125 | function element_id ($el, $counter) {
|
---|
1126 | return $el . (($counter > 1) ? '#'.$counter : '');
|
---|
1127 | }
|
---|
1128 |
|
---|
1129 | function map_attrs($k, $v) {
|
---|
1130 | return $k.'="'.htmlspecialchars($v, ENT_COMPAT, $this->encoding).'"';
|
---|
1131 | }
|
---|
1132 |
|
---|
1133 | function accepts_namespaced_xml ($attrs) {
|
---|
1134 | $mode = (isset($attrs['mode']) ? trim(strtolower($attrs['mode'])) : 'xml');
|
---|
1135 | $type = (isset($attrs['type']) ? trim(strtolower($attrs['type'])) : null);
|
---|
1136 | if ($this->feed_type == ATOM and $this->feed_version < 1.0) {
|
---|
1137 | if ($mode=='xml' and preg_match(':[/+](html|xml)$:i', $type)) {
|
---|
1138 | $ret = true;
|
---|
1139 | } else {
|
---|
1140 | $ret = false;
|
---|
1141 | }
|
---|
1142 | } elseif ($this->feed_type == ATOM and $this->feed_version >= 1.0) {
|
---|
1143 | if ($type=='xhtml' or preg_match(':[/+]xml$:i', $type)) {
|
---|
1144 | $ret = true;
|
---|
1145 | } else {
|
---|
1146 | $ret = false;
|
---|
1147 | }
|
---|
1148 | } else {
|
---|
1149 | $ret = false; // Don't munge unless you're sure
|
---|
1150 | }
|
---|
1151 | return $ret;
|
---|
1152 | }
|
---|
1153 | } // end class RSS
|
---|
1154 |
|
---|
1155 |
|
---|
1156 | // patch to support medieval versions of PHP4.1.x,
|
---|
1157 | // courtesy, Ryan Currie, ryan@digibliss.com
|
---|
1158 |
|
---|
1159 | if (!function_exists('array_change_key_case')) {
|
---|
1160 | define("CASE_UPPER",1);
|
---|
1161 | define("CASE_LOWER",0);
|
---|
1162 |
|
---|
1163 |
|
---|
1164 | function array_change_key_case($array,$case=CASE_LOWER) {
|
---|
1165 | if ($case==CASE_LOWER) $cmd='strtolower';
|
---|
1166 | elseif ($case==CASE_UPPER) $cmd='strtoupper';
|
---|
1167 | foreach($array as $key=>$value) {
|
---|
1168 | $output[$cmd($key)]=$value;
|
---|
1169 | }
|
---|
1170 | return $output;
|
---|
1171 | }
|
---|
1172 |
|
---|
1173 | }
|
---|
1174 |
|
---|
1175 | ################################################################################
|
---|
1176 | ## WordPress: Load in Snoopy from wp-includes ##################################
|
---|
1177 | ################################################################################
|
---|
1178 |
|
---|
1179 | if (!function_exists('wp_remote_request')) :
|
---|
1180 | require_once( dirname(__FILE__) . '/class-snoopy.php');
|
---|
1181 | endif;
|
---|
1182 |
|
---|
1183 | ################################################################################
|
---|
1184 | ## rss_fetch.inc: from MagpieRSS 0.8a ##########################################
|
---|
1185 | ################################################################################
|
---|
1186 |
|
---|
1187 | /*=======================================================================*\
|
---|
1188 | Function: fetch_rss:
|
---|
1189 | Purpose: return RSS object for the give url
|
---|
1190 | maintain the cache
|
---|
1191 | Input: url of RSS file
|
---|
1192 | Output: parsed RSS object (see rss_parse.inc)
|
---|
1193 |
|
---|
1194 | NOTES ON CACHEING:
|
---|
1195 | If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
|
---|
1196 |
|
---|
1197 | NOTES ON RETRIEVING REMOTE FILES:
|
---|
1198 | If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
|
---|
1199 | return a cached object, and touch the cache object upon recieving a
|
---|
1200 | 304.
|
---|
1201 |
|
---|
1202 | NOTES ON FAILED REQUESTS:
|
---|
1203 | If there is an HTTP error while fetching an RSS object, the cached
|
---|
1204 | version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
|
---|
1205 | \*=======================================================================*/
|
---|
1206 |
|
---|
1207 | define('MAGPIE_VERSION', '2009.0618');
|
---|
1208 |
|
---|
1209 | $MAGPIE_ERROR = "";
|
---|
1210 |
|
---|
1211 | function fetch_rss ($url) {
|
---|
1212 | // initialize constants
|
---|
1213 | init();
|
---|
1214 |
|
---|
1215 | if ( !isset($url) ) {
|
---|
1216 | error("fetch_rss called without a url");
|
---|
1217 | return false;
|
---|
1218 | }
|
---|
1219 |
|
---|
1220 | // if cache is disabled
|
---|
1221 | if ( !MAGPIE_CACHE_ON ) {
|
---|
1222 | // fetch file, and parse it
|
---|
1223 | $resp = _fetch_remote_file( $url );
|
---|
1224 | if ( is_success( $resp->status ) ) {
|
---|
1225 | return _response_to_rss( $resp, $url );
|
---|
1226 | }
|
---|
1227 | else {
|
---|
1228 | error("Failed to fetch $url and cache is off");
|
---|
1229 | return false;
|
---|
1230 | }
|
---|
1231 | }
|
---|
1232 | // else cache is ON
|
---|
1233 | else {
|
---|
1234 | // Flow
|
---|
1235 | // 1. check cache
|
---|
1236 | // 2. if there is a hit, make sure its fresh
|
---|
1237 | // 3. if cached obj fails freshness check, fetch remote
|
---|
1238 | // 4. if remote fails, return stale object, or error
|
---|
1239 |
|
---|
1240 | $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
|
---|
1241 |
|
---|
1242 | if (MAGPIE_DEBUG and $cache->ERROR) {
|
---|
1243 | debug($cache->ERROR, E_USER_WARNING);
|
---|
1244 | }
|
---|
1245 |
|
---|
1246 |
|
---|
1247 | $cache_status = 0; // response of check_cache
|
---|
1248 | $request_headers = array(); // HTTP headers to send with fetch
|
---|
1249 | $rss = 0; // parsed RSS object
|
---|
1250 | $errormsg = 0; // errors, if any
|
---|
1251 |
|
---|
1252 | // store parsed XML by desired output encoding
|
---|
1253 | // as character munging happens at parse time
|
---|
1254 | $cache_key = $url . MAGPIE_OUTPUT_ENCODING;
|
---|
1255 |
|
---|
1256 | if (!$cache->ERROR) {
|
---|
1257 | // return cache HIT, MISS, or STALE
|
---|
1258 | $cache_status = $cache->check_cache( $cache_key);
|
---|
1259 | }
|
---|
1260 |
|
---|
1261 | // if object cached, and cache is fresh, return cached obj
|
---|
1262 | if ( $cache_status == 'HIT' ) {
|
---|
1263 | $rss = $cache->get( $cache_key );
|
---|
1264 | if ( isset($rss) and $rss ) {
|
---|
1265 | // should be cache age
|
---|
1266 | $rss->from_cache = 1;
|
---|
1267 | if ( MAGPIE_DEBUG > 1) {
|
---|
1268 | debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
|
---|
1269 | }
|
---|
1270 | return $rss;
|
---|
1271 | }
|
---|
1272 | }
|
---|
1273 |
|
---|
1274 | // else attempt a conditional get
|
---|
1275 |
|
---|
1276 | // setup headers
|
---|
1277 | if ( $cache_status == 'STALE' ) {
|
---|
1278 | $rss = $cache->get( $cache_key );
|
---|
1279 | if ( $rss and isset($rss->etag) and $rss->last_modified ) {
|
---|
1280 | $request_headers['If-None-Match'] = $rss->etag;
|
---|
1281 | $request_headers['If-Last-Modified'] = $rss->last_modified;
|
---|
1282 | }
|
---|
1283 | }
|
---|
1284 |
|
---|
1285 | $resp = _fetch_remote_file( $url, $request_headers );
|
---|
1286 |
|
---|
1287 | if (isset($resp) and $resp) {
|
---|
1288 | if ($resp->status == '304' ) {
|
---|
1289 | // we have the most current copy
|
---|
1290 | if ( MAGPIE_DEBUG > 1) {
|
---|
1291 | debug("Got 304 for $url");
|
---|
1292 | }
|
---|
1293 | // reset cache on 304 (at minutillo insistent prodding)
|
---|
1294 | $cache->set($cache_key, $rss);
|
---|
1295 | return $rss;
|
---|
1296 | }
|
---|
1297 | elseif ( is_success( $resp->status ) ) {
|
---|
1298 | $rss = _response_to_rss( $resp, $url );
|
---|
1299 | if ( $rss ) {
|
---|
1300 | if (MAGPIE_DEBUG > 1) {
|
---|
1301 | debug("Fetch successful");
|
---|
1302 | }
|
---|
1303 | // add object to cache
|
---|
1304 | $cache->set( $cache_key, $rss );
|
---|
1305 | return $rss;
|
---|
1306 | }
|
---|
1307 | }
|
---|
1308 | else {
|
---|
1309 | $errormsg = "Failed to fetch $url ";
|
---|
1310 | if ( $resp->status == '-100' ) {
|
---|
1311 | $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
|
---|
1312 | }
|
---|
1313 | elseif ( $resp->error ) {
|
---|
1314 | # compensate for Snoopy's annoying habbit to tacking
|
---|
1315 | # on '\n'
|
---|
1316 | $http_error = substr($resp->error, 0, -2);
|
---|
1317 | $errormsg .= "(HTTP Error: $http_error)";
|
---|
1318 | }
|
---|
1319 | else {
|
---|
1320 | $errormsg .= "(HTTP Response: " . $resp->response_code .')';
|
---|
1321 | }
|
---|
1322 | }
|
---|
1323 | }
|
---|
1324 | else {
|
---|
1325 | $errormsg = "Unable to retrieve RSS file for unknown reasons.";
|
---|
1326 | }
|
---|
1327 |
|
---|
1328 | // else fetch failed
|
---|
1329 | debug("MagpieRSS fetch failed [$errormsg]");
|
---|
1330 |
|
---|
1331 | // attempt to return cached object
|
---|
1332 | if ($rss) {
|
---|
1333 | if ( MAGPIE_DEBUG ) {
|
---|
1334 | debug("Returning STALE object for $url");
|
---|
1335 | }
|
---|
1336 | return $rss;
|
---|
1337 | }
|
---|
1338 |
|
---|
1339 | // else we totally failed
|
---|
1340 | error( $errormsg );
|
---|
1341 |
|
---|
1342 | return false;
|
---|
1343 |
|
---|
1344 | } // end if ( !MAGPIE_CACHE_ON ) {
|
---|
1345 | } // end fetch_rss()
|
---|
1346 |
|
---|
1347 | /*=======================================================================*\
|
---|
1348 | Function: error
|
---|
1349 | Purpose: set MAGPIE_ERROR, and trigger error
|
---|
1350 | \*=======================================================================*/
|
---|
1351 |
|
---|
1352 | function error ($errormsg, $lvl=E_USER_WARNING) {
|
---|
1353 | global $MAGPIE_ERROR;
|
---|
1354 |
|
---|
1355 | // append PHP's error message if track_errors enabled
|
---|
1356 | if ( isset($php_errormsg) ) {
|
---|
1357 | $errormsg .= " ($php_errormsg)";
|
---|
1358 | }
|
---|
1359 | if ( $errormsg ) {
|
---|
1360 | $errormsg = "MagpieRSS: $errormsg";
|
---|
1361 | $MAGPIE_ERROR = $errormsg;
|
---|
1362 | if ( MAGPIE_DEBUG ) {
|
---|
1363 | trigger_error( $errormsg, $lvl);
|
---|
1364 | } else {
|
---|
1365 | error_log($errormsg, 0);
|
---|
1366 | }
|
---|
1367 | }
|
---|
1368 | }
|
---|
1369 |
|
---|
1370 | function debug ($debugmsg, $lvl=E_USER_NOTICE) {
|
---|
1371 | trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
|
---|
1372 | }
|
---|
1373 |
|
---|
1374 | /*=======================================================================*\
|
---|
1375 | Function: magpie_error
|
---|
1376 | Purpose: accessor for the magpie error variable
|
---|
1377 | \*=======================================================================*/
|
---|
1378 | function magpie_error ($errormsg="") {
|
---|
1379 | global $MAGPIE_ERROR;
|
---|
1380 |
|
---|
1381 | if ( isset($errormsg) and $errormsg ) {
|
---|
1382 | $MAGPIE_ERROR = $errormsg;
|
---|
1383 | }
|
---|
1384 |
|
---|
1385 | return $MAGPIE_ERROR;
|
---|
1386 | }
|
---|
1387 |
|
---|
1388 | /*=======================================================================*\
|
---|
1389 | Function: _fetch_remote_file
|
---|
1390 | Purpose: retrieve an arbitrary remote file
|
---|
1391 | Input: url of the remote file
|
---|
1392 | headers to send along with the request (optional)
|
---|
1393 | Output: an HTTP response object (see Snoopy.class.inc)
|
---|
1394 | \*=======================================================================*/
|
---|
1395 | function _fetch_remote_file ($url, $headers = "" ) {
|
---|
1396 | // Ensure that we have constants set up, since they are used below.
|
---|
1397 | init();
|
---|
1398 |
|
---|
1399 | // WordPress 2.7 has deprecated Snoopy. It's still there, for now, but
|
---|
1400 | // I'd rather not rely on it.
|
---|
1401 | if (function_exists('wp_remote_request')) :
|
---|
1402 | $resp = wp_remote_request($url, array(
|
---|
1403 | 'headers' => $headers,
|
---|
1404 | 'timeout' => MAGPIE_FETCH_TIME_OUT)
|
---|
1405 | );
|
---|
1406 |
|
---|
1407 | if ( is_wp_error($resp) ) :
|
---|
1408 | $error = $resp->get_error_messages();
|
---|
1409 |
|
---|
1410 | $client = new stdClass;
|
---|
1411 | $client->status = 500;
|
---|
1412 | $client->response_code = 500;
|
---|
1413 | $client->error = implode(" / ", $error). "\n"; //\n = Snoopy compatibility
|
---|
1414 | else :
|
---|
1415 | $client = new stdClass;
|
---|
1416 | $client->status = $resp['response']['code'];
|
---|
1417 | $client->response_code = $resp['response']['code'];
|
---|
1418 | $client->headers = $resp['headers'];
|
---|
1419 | $client->results = $resp['body'];
|
---|
1420 | endif;
|
---|
1421 | else :
|
---|
1422 | // Snoopy is an HTTP client in PHP
|
---|
1423 | $client = new Snoopy();
|
---|
1424 | $client->agent = MAGPIE_USER_AGENT;
|
---|
1425 | $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
|
---|
1426 | $client->use_gzip = MAGPIE_USE_GZIP;
|
---|
1427 | if (is_array($headers) ) {
|
---|
1428 | $client->rawheaders = $headers;
|
---|
1429 | }
|
---|
1430 | @$client->fetch($url);
|
---|
1431 | endif;
|
---|
1432 | return $client;
|
---|
1433 | }
|
---|
1434 |
|
---|
1435 | /*=======================================================================*\
|
---|
1436 | Function: _response_to_rss
|
---|
1437 | Purpose: parse an HTTP response object into an RSS object
|
---|
1438 | Input: an HTTP response object (see Snoopy)
|
---|
1439 | Output: parsed RSS object (see rss_parse)
|
---|
1440 | \*=======================================================================*/
|
---|
1441 | function _response_to_rss ($resp, $url = null) {
|
---|
1442 | $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING, $url );
|
---|
1443 |
|
---|
1444 | // if RSS parsed successfully
|
---|
1445 | if ( $rss and !$rss->ERROR) {
|
---|
1446 | $rss->http_status = $resp->status;
|
---|
1447 |
|
---|
1448 | // find Etag, and Last-Modified
|
---|
1449 | foreach($resp->headers as $h) {
|
---|
1450 | // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
|
---|
1451 | if (strpos($h, ": ")) {
|
---|
1452 | list($field, $val) = explode(": ", $h, 2);
|
---|
1453 | }
|
---|
1454 | else {
|
---|
1455 | $field = $h;
|
---|
1456 | $val = "";
|
---|
1457 | }
|
---|
1458 |
|
---|
1459 | $rss->header[$field] = $val;
|
---|
1460 |
|
---|
1461 | if ( $field == 'ETag' ) {
|
---|
1462 | $rss->etag = $val;
|
---|
1463 | }
|
---|
1464 |
|
---|
1465 | if ( $field == 'Last-Modified' ) {
|
---|
1466 | $rss->last_modified = $val;
|
---|
1467 | }
|
---|
1468 | }
|
---|
1469 |
|
---|
1470 | return $rss;
|
---|
1471 | } // else construct error message
|
---|
1472 | else {
|
---|
1473 | $errormsg = "Failed to parse RSS file.";
|
---|
1474 |
|
---|
1475 | if ($rss) {
|
---|
1476 | $errormsg .= " (" . $rss->ERROR . ")";
|
---|
1477 | }
|
---|
1478 | error($errormsg);
|
---|
1479 |
|
---|
1480 | return false;
|
---|
1481 | } // end if ($rss and !$rss->error)
|
---|
1482 | }
|
---|
1483 |
|
---|
1484 | /*=======================================================================*\
|
---|
1485 | Function: init
|
---|
1486 | Purpose: setup constants with default values
|
---|
1487 | check for user overrides
|
---|
1488 | \*=======================================================================*/
|
---|
1489 | function init () {
|
---|
1490 | if ( defined('MAGPIE_INITALIZED') ) {
|
---|
1491 | return;
|
---|
1492 | }
|
---|
1493 | else {
|
---|
1494 | define('MAGPIE_INITALIZED', true);
|
---|
1495 | }
|
---|
1496 |
|
---|
1497 | if ( !defined('MAGPIE_CACHE_ON') ) {
|
---|
1498 | define('MAGPIE_CACHE_ON', true);
|
---|
1499 | }
|
---|
1500 |
|
---|
1501 | if ( !defined('MAGPIE_CACHE_DIR') ) {
|
---|
1502 | define('MAGPIE_CACHE_DIR', './cache');
|
---|
1503 | }
|
---|
1504 |
|
---|
1505 | if ( !defined('MAGPIE_CACHE_AGE') ) {
|
---|
1506 | define('MAGPIE_CACHE_AGE', 60*60); // one hour
|
---|
1507 | }
|
---|
1508 |
|
---|
1509 | if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
|
---|
1510 | define('MAGPIE_CACHE_FRESH_ONLY', false);
|
---|
1511 | }
|
---|
1512 |
|
---|
1513 | if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
|
---|
1514 | define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
|
---|
1515 | }
|
---|
1516 |
|
---|
1517 | if ( !defined('MAGPIE_INPUT_ENCODING') ) {
|
---|
1518 | define('MAGPIE_INPUT_ENCODING', null);
|
---|
1519 | }
|
---|
1520 |
|
---|
1521 | if ( !defined('MAGPIE_DETECT_ENCODING') ) {
|
---|
1522 | define('MAGPIE_DETECT_ENCODING', true);
|
---|
1523 | }
|
---|
1524 |
|
---|
1525 | if ( !defined('MAGPIE_DEBUG') ) {
|
---|
1526 | define('MAGPIE_DEBUG', 0);
|
---|
1527 | }
|
---|
1528 |
|
---|
1529 | if ( !defined('MAGPIE_USER_AGENT') ) {
|
---|
1530 | $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
|
---|
1531 |
|
---|
1532 | if ( MAGPIE_CACHE_ON ) {
|
---|
1533 | $ua = $ua . ')';
|
---|
1534 | }
|
---|
1535 | else {
|
---|
1536 | $ua = $ua . '; No cache)';
|
---|
1537 | }
|
---|
1538 |
|
---|
1539 | define('MAGPIE_USER_AGENT', $ua);
|
---|
1540 | }
|
---|
1541 |
|
---|
1542 | if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
|
---|
1543 | define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
|
---|
1544 | }
|
---|
1545 |
|
---|
1546 | // use gzip encoding to fetch rss files if supported?
|
---|
1547 | if ( !defined('MAGPIE_USE_GZIP') ) {
|
---|
1548 | define('MAGPIE_USE_GZIP', true);
|
---|
1549 | }
|
---|
1550 | }
|
---|
1551 |
|
---|
1552 | // NOTE: the following code should really be in Snoopy, or at least
|
---|
1553 | // somewhere other then rss_fetch!
|
---|
1554 |
|
---|
1555 | /*=======================================================================*\
|
---|
1556 | HTTP STATUS CODE PREDICATES
|
---|
1557 | These functions attempt to classify an HTTP status code
|
---|
1558 | based on RFC 2616 and RFC 2518.
|
---|
1559 |
|
---|
1560 | All of them take an HTTP status code as input, and return true or false
|
---|
1561 |
|
---|
1562 | All this code is adapted from LWP's HTTP::Status.
|
---|
1563 | \*=======================================================================*/
|
---|
1564 |
|
---|
1565 |
|
---|
1566 | /*=======================================================================*\
|
---|
1567 | Function: is_info
|
---|
1568 | Purpose: return true if Informational status code
|
---|
1569 | \*=======================================================================*/
|
---|
1570 | function is_info ($sc) {
|
---|
1571 | return $sc >= 100 && $sc < 200;
|
---|
1572 | }
|
---|
1573 |
|
---|
1574 | /*=======================================================================*\
|
---|
1575 | Function: is_success
|
---|
1576 | Purpose: return true if Successful status code
|
---|
1577 | \*=======================================================================*/
|
---|
1578 | function is_success ($sc) {
|
---|
1579 | return $sc >= 200 && $sc < 300;
|
---|
1580 | }
|
---|
1581 |
|
---|
1582 | /*=======================================================================*\
|
---|
1583 | Function: is_redirect
|
---|
1584 | Purpose: return true if Redirection status code
|
---|
1585 | \*=======================================================================*/
|
---|
1586 | function is_redirect ($sc) {
|
---|
1587 | return $sc >= 300 && $sc < 400;
|
---|
1588 | }
|
---|
1589 |
|
---|
1590 | /*=======================================================================*\
|
---|
1591 | Function: is_error
|
---|
1592 | Purpose: return true if Error status code
|
---|
1593 | \*=======================================================================*/
|
---|
1594 | function is_error ($sc) {
|
---|
1595 | return $sc >= 400 && $sc < 600;
|
---|
1596 | }
|
---|
1597 |
|
---|
1598 | /*=======================================================================*\
|
---|
1599 | Function: is_client_error
|
---|
1600 | Purpose: return true if Error status code, and its a client error
|
---|
1601 | \*=======================================================================*/
|
---|
1602 | function is_client_error ($sc) {
|
---|
1603 | return $sc >= 400 && $sc < 500;
|
---|
1604 | }
|
---|
1605 |
|
---|
1606 | /*=======================================================================*\
|
---|
1607 | Function: is_client_error
|
---|
1608 | Purpose: return true if Error status code, and its a server error
|
---|
1609 | \*=======================================================================*/
|
---|
1610 | function is_server_error ($sc) {
|
---|
1611 | return $sc >= 500 && $sc < 600;
|
---|
1612 | }
|
---|
1613 |
|
---|
1614 | ################################################################################
|
---|
1615 | ## rss_cache.inc: from WordPress 1.5 ###########################################
|
---|
1616 | ################################################################################
|
---|
1617 |
|
---|
1618 | class RSSCache {
|
---|
1619 | var $BASE_CACHE = 'wp-content/cache'; // where the cache files are stored
|
---|
1620 | var $MAX_AGE = 43200; // when are files stale, default twelve hours
|
---|
1621 | var $ERROR = ''; // accumulate error messages
|
---|
1622 |
|
---|
1623 | function RSSCache ($base='', $age='') {
|
---|
1624 | if ( $base ) {
|
---|
1625 | $this->BASE_CACHE = $base;
|
---|
1626 | }
|
---|
1627 | if ( $age ) {
|
---|
1628 | $this->MAX_AGE = $age;
|
---|
1629 | }
|
---|
1630 |
|
---|
1631 | }
|
---|
1632 |
|
---|
1633 | /*=======================================================================*\
|
---|
1634 | Function: set
|
---|
1635 | Purpose: add an item to the cache, keyed on url
|
---|
1636 | Input: url from wich the rss file was fetched
|
---|
1637 | Output: true on sucess
|
---|
1638 | \*=======================================================================*/
|
---|
1639 | function set ($url, $rss) {
|
---|
1640 | global $wpdb;
|
---|
1641 | $cache_option = 'rss_' . $this->file_name( $url );
|
---|
1642 | $cache_timestamp = 'rss_' . $this->file_name( $url ) . '_ts';
|
---|
1643 |
|
---|
1644 | if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_option'") )
|
---|
1645 | add_option($cache_option, '', '', 'no');
|
---|
1646 | if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_timestamp'") )
|
---|
1647 | add_option($cache_timestamp, '', '', 'no');
|
---|
1648 |
|
---|
1649 | update_option($cache_option, $rss);
|
---|
1650 | update_option($cache_timestamp, time() );
|
---|
1651 |
|
---|
1652 | return $cache_option;
|
---|
1653 | }
|
---|
1654 |
|
---|
1655 | /*=======================================================================*\
|
---|
1656 | Function: get
|
---|
1657 | Purpose: fetch an item from the cache
|
---|
1658 | Input: url from wich the rss file was fetched
|
---|
1659 | Output: cached object on HIT, false on MISS
|
---|
1660 | \*=======================================================================*/
|
---|
1661 | function get ($url) {
|
---|
1662 | $this->ERROR = "";
|
---|
1663 | $cache_option = 'rss_' . $this->file_name( $url );
|
---|
1664 |
|
---|
1665 | if ( ! get_option( $cache_option ) ) {
|
---|
1666 | $this->debug(
|
---|
1667 | "Cache doesn't contain: $url (cache option: $cache_option)"
|
---|
1668 | );
|
---|
1669 | return 0;
|
---|
1670 | }
|
---|
1671 |
|
---|
1672 | $rss = get_option( $cache_option );
|
---|
1673 |
|
---|
1674 | // failsafe; seems to break at odd points in WP MU
|
---|
1675 | if (is_string($rss)) {
|
---|
1676 | $rss = $this->unserialize($rss);
|
---|
1677 | }
|
---|
1678 |
|
---|
1679 | return $rss;
|
---|
1680 | }
|
---|
1681 |
|
---|
1682 | /*=======================================================================*\
|
---|
1683 | Function: check_cache
|
---|
1684 | Purpose: check a url for membership in the cache
|
---|
1685 | and whether the object is older then MAX_AGE (ie. STALE)
|
---|
1686 | Input: url from wich the rss file was fetched
|
---|
1687 | Output: cached object on HIT, false on MISS
|
---|
1688 | \*=======================================================================*/
|
---|
1689 | function check_cache ( $url ) {
|
---|
1690 | $this->ERROR = "";
|
---|
1691 | $cache_option = $this->file_name( $url );
|
---|
1692 | $cache_timestamp = 'rss_' . $this->file_name( $url ) . '_ts';
|
---|
1693 |
|
---|
1694 | if ( $mtime = get_option($cache_timestamp) ) {
|
---|
1695 | // find how long ago the file was added to the cache
|
---|
1696 | // and whether that is longer then MAX_AGE
|
---|
1697 | $age = time() - $mtime;
|
---|
1698 | if ( $this->MAX_AGE > $age ) {
|
---|
1699 | // object exists and is current
|
---|
1700 | return 'HIT';
|
---|
1701 | }
|
---|
1702 | else {
|
---|
1703 | // object exists but is old
|
---|
1704 | return 'STALE';
|
---|
1705 | }
|
---|
1706 | }
|
---|
1707 | else {
|
---|
1708 | // object does not exist
|
---|
1709 | return 'MISS';
|
---|
1710 | }
|
---|
1711 | }
|
---|
1712 |
|
---|
1713 | /*=======================================================================*\
|
---|
1714 | Function: serialize
|
---|
1715 | \*=======================================================================*/
|
---|
1716 | function serialize ( $rss ) {
|
---|
1717 | return serialize( $rss );
|
---|
1718 | }
|
---|
1719 |
|
---|
1720 | /*=======================================================================*\
|
---|
1721 | Function: unserialize
|
---|
1722 | \*=======================================================================*/
|
---|
1723 | function unserialize ( $data ) {
|
---|
1724 | return unserialize( $data );
|
---|
1725 | }
|
---|
1726 |
|
---|
1727 | /*=======================================================================*\
|
---|
1728 | Function: file_name
|
---|
1729 | Purpose: map url to location in cache
|
---|
1730 | Input: url from wich the rss file was fetched
|
---|
1731 | Output: a file name
|
---|
1732 | \*=======================================================================*/
|
---|
1733 | function file_name ($url) {
|
---|
1734 | return md5( $url );
|
---|
1735 | }
|
---|
1736 |
|
---|
1737 | /*=======================================================================*\
|
---|
1738 | Function: error
|
---|
1739 | Purpose: register error
|
---|
1740 | \*=======================================================================*/
|
---|
1741 | function error ($errormsg, $lvl=E_USER_WARNING) {
|
---|
1742 | // append PHP's error message if track_errors enabled
|
---|
1743 | if ( isset($php_errormsg) ) {
|
---|
1744 | $errormsg .= " ($php_errormsg)";
|
---|
1745 | }
|
---|
1746 | $this->ERROR = $errormsg;
|
---|
1747 | if ( MAGPIE_DEBUG ) {
|
---|
1748 | trigger_error( $errormsg, $lvl);
|
---|
1749 | }
|
---|
1750 | else {
|
---|
1751 | error_log( $errormsg, 0);
|
---|
1752 | }
|
---|
1753 | }
|
---|
1754 | function debug ($debugmsg, $lvl=E_USER_NOTICE) {
|
---|
1755 | if ( MAGPIE_DEBUG ) {
|
---|
1756 | $this->error("MagpieRSS [debug] $debugmsg", $lvl);
|
---|
1757 | }
|
---|
1758 | }
|
---|
1759 | }
|
---|
1760 |
|
---|
1761 | ################################################################################
|
---|
1762 | ## rss_utils.inc: from MagpieRSS 0.8a ##########################################
|
---|
1763 | ################################################################################
|
---|
1764 |
|
---|
1765 | /*======================================================================*\
|
---|
1766 | Function: parse_w3cdtf
|
---|
1767 | Purpose: parse a W3CDTF date into unix epoch
|
---|
1768 |
|
---|
1769 | NOTE: http://www.w3.org/TR/NOTE-datetime
|
---|
1770 | \*======================================================================*/
|
---|
1771 |
|
---|
1772 | function parse_w3cdtf ( $date_str ) {
|
---|
1773 |
|
---|
1774 | # regex to match wc3dtf
|
---|
1775 | $pat = "/^\s*(\d{4})(-(\d{2})(-(\d{2})(T(\d{2}):(\d{2})(:(\d{2})(\.\d+)?)?(?:([-+])(\d{2}):?(\d{2})|(Z))?)?)?)?\s*\$/";
|
---|
1776 |
|
---|
1777 | if ( preg_match( $pat, $date_str, $match ) ) {
|
---|
1778 | list( $year, $month, $day, $hours, $minutes, $seconds) =
|
---|
1779 | array( $match[1], $match[3], $match[5], $match[7], $match[8], $match[10]);
|
---|
1780 |
|
---|
1781 | # W3C dates can omit the time, the day of the month, or even the month.
|
---|
1782 | # Fill in any blanks using information from the present moment. --CWJ
|
---|
1783 | $default['hr'] = (int) gmdate('H');
|
---|
1784 | $default['day'] = (int) gmdate('d');
|
---|
1785 | $default['month'] = (int) gmdate('m');
|
---|
1786 |
|
---|
1787 | if (is_null($hours)) : $hours = $default['hr']; $minutes = 0; $seconds = 0; endif;
|
---|
1788 | if (is_null($day)) : $day = $default['day']; endif;
|
---|
1789 | if (is_null($month)) : $month = $default['month']; endif;
|
---|
1790 |
|
---|
1791 | # calc epoch for current date assuming GMT
|
---|
1792 | $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);
|
---|
1793 |
|
---|
1794 | $offset = 0;
|
---|
1795 | if ( $match[15] == 'Z' ) {
|
---|
1796 | # zulu time, aka GMT
|
---|
1797 | }
|
---|
1798 | else {
|
---|
1799 | list( $tz_mod, $tz_hour, $tz_min ) =
|
---|
1800 | array( $match[12], $match[13], $match[14]);
|
---|
1801 |
|
---|
1802 | # zero out the variables
|
---|
1803 | if ( ! $tz_hour ) { $tz_hour = 0; }
|
---|
1804 | if ( ! $tz_min ) { $tz_min = 0; }
|
---|
1805 |
|
---|
1806 | $offset_secs = (($tz_hour*60)+$tz_min)*60;
|
---|
1807 |
|
---|
1808 | # is timezone ahead of GMT? then subtract offset
|
---|
1809 | #
|
---|
1810 | if ( $tz_mod == '+' ) {
|
---|
1811 | $offset_secs = $offset_secs * -1;
|
---|
1812 | }
|
---|
1813 |
|
---|
1814 | $offset = $offset_secs;
|
---|
1815 | }
|
---|
1816 | $epoch = $epoch + $offset;
|
---|
1817 | return $epoch;
|
---|
1818 | }
|
---|
1819 | else {
|
---|
1820 | return -1;
|
---|
1821 | }
|
---|
1822 | }
|
---|
1823 |
|
---|
1824 | # Relative URI static class: PHP class for resolving relative URLs
|
---|
1825 | #
|
---|
1826 | # This class is derived (under the terms of the GPL) from URL Class 0.3 by
|
---|
1827 | # Keyvan Minoukadeh <keyvan@k1m.com>, which is great but more than we need
|
---|
1828 | # for MagpieRSS's purposes. The class has been stripped down to a single
|
---|
1829 | # public method: Relative_URI::resolve($url, $base), which resolves the URI in
|
---|
1830 | # $url relative to the URI in $base
|
---|
1831 | #
|
---|
1832 | # FeedWordPress also uses this class. So if we have it loaded in, don't load it
|
---|
1833 | # again.
|
---|
1834 | #
|
---|
1835 | # -- Charles Johnson <technophilia@radgeek.com>
|
---|
1836 | if (!class_exists('Relative_URI')) {
|
---|
1837 | class Relative_URI
|
---|
1838 | {
|
---|
1839 | // Resolve relative URI in $url against the base URI in $base. If $base
|
---|
1840 | // is not supplied, then we use the REQUEST_URI of this script.
|
---|
1841 | //
|
---|
1842 | // I'm hoping this method reflects RFC 2396 Section 5.2
|
---|
1843 | function resolve ($url, $base = NULL)
|
---|
1844 | {
|
---|
1845 | if (is_null($base)):
|
---|
1846 | $base = 'http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'];
|
---|
1847 | endif;
|
---|
1848 |
|
---|
1849 | $base = Relative_URI::_encode(trim($base));
|
---|
1850 | $uri_parts = Relative_URI::_parse_url($base);
|
---|
1851 |
|
---|
1852 | $url = Relative_URI::_encode(trim($url));
|
---|
1853 | $parts = Relative_URI::_parse_url($url);
|
---|
1854 |
|
---|
1855 | $uri_parts['fragment'] = (isset($parts['fragment']) ? $parts['fragment'] : null);
|
---|
1856 | $uri_parts['query'] = (isset($parts['query']) ? $parts['query'] : null);
|
---|
1857 |
|
---|
1858 | // if path is empty, and scheme, host, and query are undefined,
|
---|
1859 | // the URL is referring the base URL
|
---|
1860 |
|
---|
1861 | if (($parts['path'] == '') && !isset($parts['scheme']) && !isset($parts['host']) && !isset($parts['query'])) {
|
---|
1862 | // If the URI is empty or only a fragment, return the base URI
|
---|
1863 | return $base . (isset($parts['fragment']) ? '#'.$parts['fragment'] : '');
|
---|
1864 | } elseif (isset($parts['scheme'])) {
|
---|
1865 | // If the scheme is set, then the URI is absolute.
|
---|
1866 | return $url;
|
---|
1867 | } elseif (isset($parts['host'])) {
|
---|
1868 | $uri_parts['host'] = $parts['host'];
|
---|
1869 | $uri_parts['path'] = $parts['path'];
|
---|
1870 | } else {
|
---|
1871 | // We have a relative path but not a host.
|
---|
1872 |
|
---|
1873 | // start ugly fix:
|
---|
1874 | // prepend slash to path if base host is set, base path is not set, and url path is not absolute
|
---|
1875 | if ($uri_parts['host'] && ($uri_parts['path'] == '')
|
---|
1876 | && (strlen($parts['path']) > 0)
|
---|
1877 | && (substr($parts['path'], 0, 1) != '/')) {
|
---|
1878 | $parts['path'] = '/'.$parts['path'];
|
---|
1879 | } // end ugly fix
|
---|
1880 |
|
---|
1881 | if (substr($parts['path'], 0, 1) == '/') {
|
---|
1882 | $uri_parts['path'] = $parts['path'];
|
---|
1883 | } else {
|
---|
1884 | // copy base path excluding any characters after the last (right-most) slash character
|
---|
1885 | $buffer = substr($uri_parts['path'], 0, (int)strrpos($uri_parts['path'], '/')+1);
|
---|
1886 | // append relative path
|
---|
1887 | $buffer .= $parts['path'];
|
---|
1888 | // remove "./" where "." is a complete path segment.
|
---|
1889 | $buffer = str_replace('/./', '/', $buffer);
|
---|
1890 | if (substr($buffer, 0, 2) == './') {
|
---|
1891 | $buffer = substr($buffer, 2);
|
---|
1892 | }
|
---|
1893 | // if buffer ends with "." as a complete path segment, remove it
|
---|
1894 | if (substr($buffer, -2) == '/.') {
|
---|
1895 | $buffer = substr($buffer, 0, -1);
|
---|
1896 | }
|
---|
1897 | // remove "<segment>/../" where <segment> is a complete path segment not equal to ".."
|
---|
1898 | $search_finished = false;
|
---|
1899 | $segment = explode('/', $buffer);
|
---|
1900 | while (!$search_finished) {
|
---|
1901 | for ($x=0; $x+1 < count($segment);) {
|
---|
1902 | if (($segment[$x] != '') && ($segment[$x] != '..') && ($segment[$x+1] == '..')) {
|
---|
1903 | if ($x+2 == count($segment)) $segment[] = '';
|
---|
1904 | unset($segment[$x], $segment[$x+1]);
|
---|
1905 | $segment = array_values($segment);
|
---|
1906 | continue 2;
|
---|
1907 | } else {
|
---|
1908 | $x++;
|
---|
1909 | }
|
---|
1910 | }
|
---|
1911 | $search_finished = true;
|
---|
1912 | }
|
---|
1913 | $buffer = (count($segment) == 1) ? '/' : implode('/', $segment);
|
---|
1914 | $uri_parts['path'] = $buffer;
|
---|
1915 |
|
---|
1916 | }
|
---|
1917 | }
|
---|
1918 |
|
---|
1919 | // If we've gotten to this point, we can try to put the pieces
|
---|
1920 | // back together.
|
---|
1921 | $ret = '';
|
---|
1922 | if (isset($uri_parts['scheme'])) $ret .= $uri_parts['scheme'].':';
|
---|
1923 | if (isset($uri_parts['user'])) {
|
---|
1924 | $ret .= $uri_parts['user'];
|
---|
1925 | if (isset($uri_parts['pass'])) $ret .= ':'.$uri_parts['parts'];
|
---|
1926 | $ret .= '@';
|
---|
1927 | }
|
---|
1928 | if (isset($uri_parts['host'])) {
|
---|
1929 | $ret .= '//'.$uri_parts['host'];
|
---|
1930 | if (isset($uri_parts['port'])) $ret .= ':'.$uri_parts['port'];
|
---|
1931 | }
|
---|
1932 | $ret .= $uri_parts['path'];
|
---|
1933 | if (isset($uri_parts['query'])) $ret .= '?'.$uri_parts['query'];
|
---|
1934 | if (isset($uri_parts['fragment'])) $ret .= '#'.$uri_parts['fragment'];
|
---|
1935 |
|
---|
1936 | return $ret;
|
---|
1937 | }
|
---|
1938 |
|
---|
1939 | /**
|
---|
1940 | * Parse URL
|
---|
1941 | *
|
---|
1942 | * Regular expression grabbed from RFC 2396 Appendix B.
|
---|
1943 | * This is a replacement for PHPs builtin parse_url().
|
---|
1944 | * @param string $url
|
---|
1945 | * @access private
|
---|
1946 | * @return array
|
---|
1947 | */
|
---|
1948 | function _parse_url($url)
|
---|
1949 | {
|
---|
1950 | // I'm using this pattern instead of parse_url() as there's a few strings where parse_url()
|
---|
1951 | // generates a warning.
|
---|
1952 | if (preg_match('!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!', $url, $match)) {
|
---|
1953 | $parts = array();
|
---|
1954 | if ($match[1] != '') $parts['scheme'] = $match[2];
|
---|
1955 | if ($match[3] != '') $parts['auth'] = $match[4];
|
---|
1956 | // parse auth
|
---|
1957 | if (isset($parts['auth'])) {
|
---|
1958 | // store user info
|
---|
1959 | if (($at_pos = strpos($parts['auth'], '@')) !== false) {
|
---|
1960 | $userinfo = explode(':', substr($parts['auth'], 0, $at_pos), 2);
|
---|
1961 | $parts['user'] = $userinfo[0];
|
---|
1962 | if (isset($userinfo[1])) $parts['pass'] = $userinfo[1];
|
---|
1963 | $parts['auth'] = substr($parts['auth'], $at_pos+1);
|
---|
1964 | }
|
---|
1965 | // get port number
|
---|
1966 | if ($port_pos = strrpos($parts['auth'], ':')) {
|
---|
1967 | $parts['host'] = substr($parts['auth'], 0, $port_pos);
|
---|
1968 | $parts['port'] = (int)substr($parts['auth'], $port_pos+1);
|
---|
1969 | if ($parts['port'] < 1) $parts['port'] = null;
|
---|
1970 | } else {
|
---|
1971 | $parts['host'] = $parts['auth'];
|
---|
1972 | }
|
---|
1973 | }
|
---|
1974 | unset($parts['auth']);
|
---|
1975 | $parts['path'] = $match[5];
|
---|
1976 | if (isset($match[6]) && ($match[6] != '')) $parts['query'] = $match[7];
|
---|
1977 | if (isset($match[8]) && ($match[8] != '')) $parts['fragment'] = $match[9];
|
---|
1978 | return $parts;
|
---|
1979 | }
|
---|
1980 | // shouldn't reach here
|
---|
1981 | return array('path'=>'');
|
---|
1982 | }
|
---|
1983 |
|
---|
1984 | function _encode($string)
|
---|
1985 | {
|
---|
1986 | static $replace = array();
|
---|
1987 | if (!count($replace)) {
|
---|
1988 | $find = array(32, 34, 60, 62, 123, 124, 125, 91, 92, 93, 94, 96, 127);
|
---|
1989 | $find = array_merge(range(0, 31), $find);
|
---|
1990 | $find = array_map('chr', $find);
|
---|
1991 | foreach ($find as $char) {
|
---|
1992 | $replace[$char] = '%'.bin2hex($char);
|
---|
1993 | }
|
---|
1994 | }
|
---|
1995 | // escape control characters and a few other characters
|
---|
1996 | $encoded = strtr($string, $replace);
|
---|
1997 | // remove any character outside the hex range: 21 - 7E (see www.asciitable.com)
|
---|
1998 | return preg_replace('/[^\x21-\x7e]/', '', $encoded);
|
---|
1999 | }
|
---|
2000 | } // class Relative_URI
|
---|
2001 | }
|
---|
2002 |
|
---|
2003 | ################################################################################
|
---|
2004 | ## WordPress: wp_rss(), get_rss() ##############################################
|
---|
2005 | ################################################################################
|
---|
2006 |
|
---|
2007 | function wp_rss ($url, $num) {
|
---|
2008 | //ini_set("display_errors", false); uncomment to suppress php errors thrown if the feed is not returned.
|
---|
2009 | $num_items = $num;
|
---|
2010 | $rss = fetch_rss($url);
|
---|
2011 | if ( $rss ) {
|
---|
2012 | echo "<ul>";
|
---|
2013 | $rss->items = array_slice($rss->items, 0, $num_items);
|
---|
2014 | foreach ($rss->items as $item ) {
|
---|
2015 | echo "<li>\n";
|
---|
2016 | echo "<a href='$item[link]' title='$item[description]'>";
|
---|
2017 | echo htmlentities($item['title']);
|
---|
2018 | echo "</a><br />\n";
|
---|
2019 | echo "</li>\n";
|
---|
2020 | }
|
---|
2021 | echo "</ul>";
|
---|
2022 | }
|
---|
2023 | else {
|
---|
2024 | echo "an error has occured the feed is probably down, try again later.";
|
---|
2025 | }
|
---|
2026 | }
|
---|
2027 |
|
---|
2028 | function get_rss ($uri, $num = 5) { // Like get posts, but for RSS
|
---|
2029 | $rss = fetch_rss($url);
|
---|
2030 | if ( $rss ) {
|
---|
2031 | $rss->items = array_slice($rss->items, 0, $num_items);
|
---|
2032 | foreach ($rss->items as $item ) {
|
---|
2033 | echo "<li>\n";
|
---|
2034 | echo "<a href='$item[link]' title='$item[description]'>";
|
---|
2035 | echo htmlentities($item['title']);
|
---|
2036 | echo "</a><br />\n";
|
---|
2037 | echo "</li>\n";
|
---|
2038 | }
|
---|
2039 | return $posts;
|
---|
2040 | } else {
|
---|
2041 | return false;
|
---|
2042 | }
|
---|
2043 | }
|
---|
2044 | ?>
|
---|