* WordPress development team
* Charles Johnson
* Version: 2009.0618
* License: GPL
*
* Provenance:
*
* This is a drop-in replacement for the `rss-functions.php` provided with the
* WordPress 1.5 distribution, which upgrades the version of MagpieRSS from 0.51
* to 0.8a. The update improves handling of character encoding, supports
* multiple categories for posts (using or ), supports
* Atom 1.0, and implements many other useful features. The file is derived from
* a combination of (1) the WordPress development team's modifications to
* MagpieRSS 0.51 and (2) the latest bleeding-edge updates to the "official"
* MagpieRSS software, including Kellan's original work and some substantial
* updates by Charles Johnson. All possible through the magic of the GPL. Yay
* for free software!
*
* Differences from the main branch of MagpieRSS:
*
* 1. Everything in rss_parse.inc, rss_fetch.inc, rss_cache.inc, and
* rss_utils.inc is included in one file.
*
* 2. MagpieRSS returns the WordPress version as the user agent, rather than
* Magpie
*
* 3. class RSSCache is a modified version by WordPress developers, which
* caches feeds in the WordPress database (in the options table), rather
* than writing external files directly.
*
* 4. There are two WordPress-specific functions, get_rss() and wp_rss()
*
* Differences from the version of MagpieRSS packaged with WordPress:
*
* 1. Support for translation between multiple character encodings. Under
* PHP 5 this is very nicely handled by the XML parsing library. Under PHP
* 4 we need to do a little bit of work ourselves, using either iconv or
* mb_convert_encoding if it is not one of the (extremely limited) number
* of character sets that PHP 4's XML module can handle natively.
*
* 2. Numerous bug fixes.
*
* 3. The parser class MagpieRSS has been substantially revised to better
* support popular features such as enclosures and multiple categories,
* and to support the new Atom 1.0 IETF standard. (Atom feeds are
* normalized so as to make the data available using terminology from
* either Atom 0.3 or Atom 1.0. Atom 0.3 backward-compatibility is provided
* to allow existing software to easily begin accepting Atom 1.0 data; new
* software SHOULD NOT depend on the 0.3 terminology, but rather use the
* normalization as a convenient way to keep supporting 0.3 feeds while
* they linger in the world.)
*
* The upgraded MagpieRSS can also now handle some content constructs that
* had not been handled well by previous versions of Magpie (such as the
* use of namespaced XHTML in or elements to
* provide the full content of posts in RSS 2.0 feeds).
*
* Unlike previous versions of MagpieRSS, this version can parse multiple
* instances of the same child element in item/entry and channel/feed
* containers. This is done using simple counters next to the element
* names: the first element on an RSS item, for example, can be
* found in $item['category'] (thus preserving backward compatibility); the
* second in $item['category#2'], the third in $item['category#3'], and so
* on. The number of categories applied to the item can be found in
* $item['category#']
*
* Also unlike previous versions of MagpieRSS, this version allows you to
* access the values of elements' attributes as well as the content they
* contain. This can be done using a simple syntax inspired by XPath: to
* access the type attribute of an RSS 2.0 enclosure, for example, you
* need only access `$item['enclosure@type']`. A comma-separated list of
* attributes for the enclosure element is stored in `$item['enclosure@']`.
* (This syntax interacts easily with the syntax for multiple categories;
* for example, the value of the `scheme` attribute for the fourth category
* element on a particular item is stored in `$item['category#4@scheme']`.)
*
* Note also that this implementation IS NOT backward-compatible with the
* kludges that were used to hack in support for multiple categories and
* for enclosures in upgraded versions of MagpieRSS distributed with
* previous versions of FeedWordPress. If your hacks or filter plugins
* depended on the old way of doing things... well, I warned you that they
* might not be permanent. Sorry!
*/
define('RSS', 'RSS');
define('ATOM', 'Atom');
################################################################################
## WordPress: make some settings WordPress-appropriate #########################
################################################################################
define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version . '(+http://www.wordpress.org)');
$wp_encoding = get_settings('blog_charset');
define('MAGPIE_OUTPUT_ENCODING', ($wp_encoding?$wp_encoding:'ISO-8859-1'));
################################################################################
## rss_parse.inc: from MagpieRSS 0.85 ##########################################
################################################################################
/**
* Hybrid parser, and object, takes RSS as a string and returns a simple object.
*
* see: rss_fetch.inc for a simpler interface with integrated caching support
*
*/
class MagpieRSS {
var $parser;
var $current_item = array(); // item currently being parsed
var $items = array(); // collection of parsed items
var $channel = array(); // hash of channel fields
var $textinput = array();
var $image = array();
var $feed_type;
var $feed_version;
var $encoding = ''; // output encoding of parsed rss
var $_source_encoding = ''; // only set if we have to parse xml prolog
var $ERROR = "";
var $WARNING = "";
// define some constants
var $_XMLNS_FAMILIAR = array (
'http://www.w3.org/2005/Atom' => 'atom' /* 1.0 */,
'http://purl.org/atom/ns#' => 'atom' /* pre-1.0 */,
'http://purl.org/rss/1.0/' => 'rss' /* 1.0 */,
'http://backend.userland.com/RSS2' => 'rss' /* 2.0 */,
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf',
'http://www.w3.org/1999/xhtml' => 'xhtml',
'http://purl.org/dc/elements/1.1/' => 'dc',
'http://purl.org/dc/terms/' => 'dcterms',
'http://purl.org/rss/1.0/modules/content/' => 'content',
'http://purl.org/rss/1.0/modules/syndication/' => 'sy',
'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
'http://purl.org/rss/1.0/modules/dc/' => 'dc',
'http://wellformedweb.org/CommentAPI/' => 'wfw',
'http://webns.net/mvcb/' => 'admin',
'http://purl.org/rss/1.0/modules/annotate/' => 'annotate',
'http://xmlns.com/foaf/0.1/' => 'foaf',
'http://madskills.com/public/xml/rss/module/trackback/' => 'trackback',
'http://web.resource.org/cc/' => 'cc',
'http://search.yahoo.com/mrss' => 'media',
);
var $_XMLBASE_RESOLVE = array (
// Atom 0.3 and 1.0 xml:base support
'atom' => array (
'link' => array ('href' => true),
'content' => array ('src' => true, '*xml' => true, '*html' => true),
'summary' => array ('*xml' => true, '*html' => true),
'title' => array ('*xml' => true, '*html' => true),
'rights' => array ('*xml' => true, '*html' => true),
'subtitle' => array ('*xml' => true, '*html' => true),
'info' => array('*xml' => true, '*html' => true),
'tagline' => array('*xml' => true, '*html' => true),
'copyright' => array ('*xml' => true, '*html' => true),
'generator' => array ('uri' => true, 'url' => true),
'uri' => array ('*content' => true),
'url' => array ('*content' => true),
'icon' => array ('*content' => true),
'logo' => array ('*content' => true),
),
// for inline namespaced XHTML
'xhtml' => array (
'a' => array ('href' => true),
'applet' => array('codebase' => true),
'area' => array('href' => true),
'blockquote' => array('cite' => true),
'body' => array('background' => true),
'del' => array('cite' => true),
'form' => array('action' => true),
'frame' => array('longdesc' => true, 'src' => true),
'iframe' => array('longdesc' => true, 'iframe' => true, 'src' => true),
'head' => array('profile' => true),
'img' => array('longdesc' => true, 'src' => true, 'usemap' => true),
'input' => array('src' => true, 'usemap' => true),
'ins' => array('cite' => true),
'link' => array('href' => true),
'object' => array('classid' => true, 'codebase' => true, 'data' => true, 'usemap' => true),
'q' => array('cite' => true),
'script' => array('src' => true),
),
);
var $_ATOM_CONTENT_CONSTRUCTS = array(
'content', 'summary', 'title', /* common */
'info', 'tagline', 'copyright', /* Atom 0.3 */
'rights', 'subtitle', /* Atom 1.0 */
);
var $_XHTML_CONTENT_CONSTRUCTS = array('body', 'div');
var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
// parser variables, useless if you're not a parser, treat as private
var $stack = array('element' => array (), 'ns' => array (), 'xmlns' => array (), 'xml:base' => array ()); // stack of XML data
var $inchannel = false;
var $initem = false;
var $incontent = array(); // non-empty if in namespaced XML content field
var $xml_escape = false; // true when accepting namespaced XML
var $exclude_top = false; // true when Atom 1.0 type="xhtml"
var $intextinput = false;
var $inimage = false;
var $root_namespaces = array();
var $current_namespace = false;
var $working_namespace_table = array();
/**
* Set up XML parser, parse source, and return populated RSS object..
*
* @param string $source string containing the RSS to be parsed
*
* NOTE: Probably a good idea to leave the encoding options alone unless
* you know what you're doing as PHP's character set support is
* a little weird.
*
* NOTE: A lot of this is unnecessary but harmless with PHP5
*
*
* @param string $output_encoding output the parsed RSS in this character
* set defaults to ISO-8859-1 as this is PHP's
* default.
*
* NOTE: might be changed to UTF-8 in future
* versions.
*
* @param string $input_encoding the character set of the incoming RSS source.
* Leave blank and Magpie will try to figure it
* out.
*
*
* @param bool $detect_encoding if false Magpie won't attempt to detect
* source encoding. (caveat emptor)
*
*/
function MagpieRSS ($source, $output_encoding='ISO-8859-1',
$input_encoding=null, $detect_encoding=true, $base_uri=null)
{
# if PHP xml isn't compiled in, die
#
if (!function_exists('xml_parser_create')) {
$this->error( "Failed to load PHP's XML Extension. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
list($parser, $source) = $this->create_parser($source,
$output_encoding, $input_encoding, $detect_encoding);
if (!is_resource($parser)) {
$this->error( "Failed to create an instance of PHP's XML parser. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
$this->parser = $parser;
# pass in parser, and a reference to this object
# setup handlers
#
xml_set_object( $this->parser, $this );
xml_set_element_handler($this->parser,
'feed_start_element', 'feed_end_element' );
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
$this->stack['xml:base'] = array($base_uri);
$status = xml_parse( $this->parser, $source );
if (! $status ) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
$xml_error = xml_error_string( $errorcode );
$error_line = xml_get_current_line_number($this->parser);
$error_col = xml_get_current_column_number($this->parser);
$errormsg = "$xml_error at line $error_line, column $error_col";
$this->error( $errormsg );
}
}
xml_parser_free( $this->parser );
$this->normalize();
}
function feed_start_element($p, $element, &$attributes) {
$el = strtolower($element);
$namespaces = end($this->stack['xmlns']);
$baseuri = end($this->stack['xml:base']);
if (isset($attributes['xml:base'])) {
$baseuri = Relative_URI::resolve($attributes['xml:base'], $baseuri);
}
array_push($this->stack['xml:base'], $baseuri);
// scan for xml namespace declarations. ugly ugly ugly.
// theoretically we could use xml_set_start_namespace_decl_handler and
// xml_set_end_namespace_decl_handler to handle this more elegantly, but
// support for these is buggy
foreach ($attributes as $attr => $value) {
if ( preg_match('/^xmlns(\:([A-Z_a-z].*))?$/', $attr, $match) ) {
$ns = (isset($match[2]) ? $match[2] : '');
$namespaces[$ns] = $value;
}
}
array_push($this->stack['xmlns'], $namespaces);
// check for a namespace, and split if found
// Don't munge content tags
$ns = $this->namespace($element);
if ( empty($this->incontent) ) {
$el = strtolower($ns['element']);
$this->current_namespace = $ns['effective'];
array_push($this->stack['ns'], $ns['effective']);
}
$nsc = $ns['canonical']; $nse = $ns['element'];
if ( isset($this->_XMLBASE_RESOLVE[$nsc][$nse]) ) {
if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*xml'])) {
$attributes['xml:base'] = $baseuri;
}
foreach ($attributes as $key => $value) {
if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse][strtolower($key)])) {
$attributes[$key] = Relative_URI::resolve($attributes[$key], $baseuri);
}
}
}
$attrs = array_change_key_case($attributes, CASE_LOWER);
# if feed type isn't set, then this is first element of feed
# identify feed from root element
#
if (!isset($this->feed_type) ) {
if ( $el == 'rdf' ) {
$this->feed_type = RSS;
$this->root_namespaces = array('rss', 'rdf');
$this->feed_version = '1.0';
}
elseif ( $el == 'rss' ) {
$this->feed_type = RSS;
$this->root_namespaces = array('rss');
$this->feed_version = $attrs['version'];
}
elseif ( $el == 'feed' ) {
$this->feed_type = ATOM;
$this->root_namespaces = array('atom');
if ($ns['uri'] == 'http://www.w3.org/2005/Atom') { // Atom 1.0
$this->feed_version = '1.0';
}
else { // Atom 0.3, probably.
$this->feed_version = $attrs['version'];
}
$this->inchannel = true;
}
return;
}
// if we're inside a namespaced content construct, treat tags as text
if ( !empty($this->incontent) )
{
if ((count($this->incontent) > 1) or !$this->exclude_top) {
if ($ns['effective']=='xhtml') {
$tag = $ns['element'];
}
else {
$tag = $element;
$xmlns = 'xmlns';
if (strlen($ns['prefix'])>0) {
$xmlns = $xmlns . ':' . $ns['prefix'];
}
$attributes[$xmlns] = $ns['uri']; // make sure it's visible
}
// if tags are inlined, then flatten
$attrs_str = join(' ',
array_map(array($this, 'map_attrs'),
array_keys($attributes),
array_values($attributes) )
);
if (strlen($attrs_str) > 0) { $attrs_str = ' '.$attrs_str; }
$this->append_content( "<{$tag}{$attrs_str}>" );
}
array_push($this->incontent, $ns); // stack for parsing content XML
}
elseif ( $el == 'channel' ) {
$this->inchannel = true;
}
elseif ($el == 'item' or $el == 'entry' )
{
$this->initem = true;
if ( isset($attrs['rdf:about']) ) {
$this->current_item['about'] = $attrs['rdf:about'];
}
}
// if we're in the default namespace of an RSS feed,
// record textinput or image fields
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'textinput' )
{
$this->intextinput = true;
}
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'image' )
{
$this->inimage = true;
}
// set stack[0] to current element
else {
// Atom support many links per containing element.
// Magpie treats link elements of type rel='alternate'
// as being equivalent to RSS's simple link element.
$atom_link = false;
if ( ($ns['canonical']=='atom') and $el == 'link') {
$atom_link = true;
if (isset($attrs['rel']) and $attrs['rel'] != 'alternate') {
$el = $el . "_" . $attrs['rel']; // pseudo-element names for Atom link elements
}
}
# handle atom content constructs
elseif ( ($ns['canonical']=='atom') and in_array($el, $this->_ATOM_CONTENT_CONSTRUCTS) )
{
// avoid clashing w/ RSS mod_content
if ($el == 'content' ) {
$el = 'atom_content';
}
// assume that everything accepts namespaced XML
// (that will pass through some non-validating feeds;
// but so what? this isn't a validating parser)
$this->incontent = array();
array_push($this->incontent, $ns); // start a stack
$this->xml_escape = $this->accepts_namespaced_xml($attrs);
if ( isset($attrs['type']) and trim(strtolower($attrs['type']))=='xhtml') {
$this->exclude_top = true;
} else {
$this->exclude_top = false;
}
}
# Handle inline XHTML body elements --CWJ
elseif ($ns['effective']=='xhtml' and in_array($el, $this->_XHTML_CONTENT_CONSTRUCTS)) {
$this->current_namespace = 'xhtml';
$this->incontent = array();
array_push($this->incontent, $ns); // start a stack
$this->xml_escape = true;
$this->exclude_top = false;
}
array_unshift($this->stack['element'], $el);
$elpath = join('_', array_reverse($this->stack['element']));
$n = $this->element_count($elpath);
$this->element_count($elpath, $n+1);
if ($n > 0) {
array_shift($this->stack['element']);
array_unshift($this->stack['element'], $el.'#'.($n+1));
$elpath = join('_', array_reverse($this->stack['element']));
}
// this makes the baby Jesus cry, but we can't do it in normalize()
// because we've made the element name for Atom links unpredictable
// by tacking on the relation to the end. -CWJ
if ($atom_link and isset($attrs['href'])) {
$this->append($elpath, $attrs['href']);
}
// add attributes
if (count($attrs) > 0) {
$this->append($elpath.'@', join(',', array_keys($attrs)));
foreach ($attrs as $attr => $value) {
$this->append($elpath.'@'.$attr, $value);
}
}
}
}
function feed_cdata ($p, $text) {
if ($this->incontent) {
if ($this->xml_escape) { $text = htmlspecialchars($text, ENT_COMPAT, $this->encoding); }
$this->append_content( $text );
} else {
$current_el = join('_', array_reverse($this->stack['element']));
$this->append($current_el, $text);
}
}
function feed_end_element ($p, $el) {
$closer = $this->namespace($el);
if ( $this->incontent ) {
$opener = array_pop($this->incontent);
// balance tags properly
// note: i don't think this is actually neccessary
if ($opener != $closer) {
array_push($this->incontent, $opener);
$this->append_content("<$el />");
} elseif ($this->incontent) { // are we in the content construct still?
if ((count($this->incontent) > 1) or !$this->exclude_top) {
if ($closer['effective']=='xhtml') {
$tag = $closer['element'];
}
else {
$tag = $el;
}
$this->append_content("$tag>");
}
} else { // if we're done with the content construct, shift the opening of the content construct off the normal stack
array_shift( $this->stack['element'] );
}
}
elseif ($closer['effective'] == '') {
$el = strtolower($closer['element']);
if ( $el == 'item' or $el == 'entry' ) {
$this->items[] = $this->current_item;
$this->current_item = array();
$this->initem = false;
$this->current_category = 0;
}
elseif ($this->feed_type == RSS and $el == 'textinput' ) {
$this->intextinput = false;
}
elseif ($this->feed_type == RSS and $el == 'image' ) {
$this->inimage = false;
}
elseif ($el == 'channel' or $el == 'feed' ) {
$this->inchannel = false;
} else {
$nsc = $closer['canonical']; $nse = $closer['element'];
if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
// Resolve relative URI in content of tag
$this->dereference_current_element();
}
array_shift( $this->stack['element'] );
}
} else {
$nsc = $closer['canonical']; $nse = strtolower($closer['element']);
if (isset($this->_XMLBASE_RESOLVE[$nsc][$nse]['*content'])) {
// Resolve relative URI in content of tag
$this->dereference_current_element();
}
array_shift( $this->stack['element'] );
}
if ( !$this->incontent ) { // Don't munge the namespace after finishing with elements in namespaced content constructs -CWJ
$this->current_namespace = array_pop($this->stack['ns']);
}
array_pop($this->stack['xmlns']);
array_pop($this->stack['xml:base']);
}
// Namespace handling functions
function namespace ($element) {
$namespaces = end($this->stack['xmlns']);
$ns = '';
if ( strpos( $element, ':' ) ) {
list($ns, $element) = split( ':', $element, 2);
}
$uri = (isset($namespaces[$ns]) ? $namespaces[$ns] : null);
if (!is_null($uri)) {
$canonical = (
isset($this->_XMLNS_FAMILIAR[$uri])
? $this->_XMLNS_FAMILIAR[$uri]
: $uri
);
} else {
$canonical = $ns;
}
if (in_array($canonical, $this->root_namespaces)) {
$effective = '';
} else {
$effective = $canonical;
}
return array('effective' => $effective, 'canonical' => $canonical, 'prefix' => $ns, 'uri' => $uri, 'element' => $element);
}
// Utility functions for accessing data structure
// for smart, namespace-aware methods...
function magpie_data ($el, $method, $text = NULL) {
$ret = NULL;
if ($el) {
if (is_array($method)) {
$el = $this->{$method['key']}($el);
$method = $method['value'];
}
if ( $this->current_namespace ) {
if ( $this->initem ) {
$ret = $this->{$method} (
$this->current_item[ $this->current_namespace ][ $el ],
$text
);
}
elseif ($this->inchannel) {
$ret = $this->{$method} (
$this->channel[ $this->current_namespace][ $el ],
$text
);
}
elseif ($this->intextinput) {
$ret = $this->{$method} (
$this->textinput[ $this->current_namespace][ $el ],
$text
);
}
elseif ($this->inimage) {
$ret = $this->{$method} (
$this->image[ $this->current_namespace ][ $el ], $text );
}
}
else {
if ( $this->initem ) {
$ret = $this->{$method} (
$this->current_item[ $el ], $text);
}
elseif ($this->intextinput) {
$ret = $this->{$method} (
$this->textinput[ $el ], $text );
}
elseif ($this->inimage) {
$ret = $this->{$method} (
$this->image[ $el ], $text );
}
elseif ($this->inchannel) {
$ret = $this->{$method} (
$this->channel[ $el ], $text );
}
}
}
return $ret;
}
function concat (&$str1, $str2="") {
if (!isset($str1) ) {
$str1="";
}
$str1 .= $str2;
}
function retrieve_value (&$el, $text /*ignore*/) {
return $el;
}
function replace_value (&$el, $text) {
$el = $text;
}
function counter_key ($el) {
return $el.'#';
}
function append_content($text) {
$construct = reset($this->incontent);
$ns = $construct['effective'];
// Keeping data about parent elements is necessary to
// properly handle atom:source and its children elements
$tag = join('_', array_reverse($this->stack['element']));
if ( $this->initem ) {
if ($ns) {
$this->concat( $this->current_item[$ns][$tag], $text );
} else {
$this->concat( $this->current_item[$tag], $text );
}
}
elseif ( $this->inchannel ) {
if ($this->current_namespace) {
$this->concat( $this->channel[$ns][$tag], $text );
} else {
$this->concat( $this->channel[$tag], $text );
}
}
}
// smart append - field and namespace aware
function append($el, $text) {
$this->magpie_data($el, 'concat', $text);
}
function dereference_current_element () {
$el = join('_', array_reverse($this->stack['element']));
$base = end($this->stack['xml:base']);
$uri = $this->magpie_data($el, 'retrieve_value');
$this->magpie_data($el, 'replace_value', Relative_URI::resolve($uri, $base));
}
// smart count - field and namespace aware
function element_count ($el, $set = NULL) {
if (!is_null($set)) {
$ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'replace_value'), $set);
}
$ret = $this->magpie_data($el, array('key' => 'counter_key', 'value' => 'retrieve_value'));
return ($ret ? $ret : 0);
}
function normalize_enclosure (&$source, $from, &$dest, $to, $i) {
$id_from = $this->element_id($from, $i);
$id_to = $this->element_id($to, $i);
if (isset($source["{$id_from}@"])) {
foreach (explode(',', $source["{$id_from}@"]) as $attr) {
if ($from=='link_enclosure' and $attr=='href') { // from Atom
$dest["{$id_to}@url"] = $source["{$id_from}@{$attr}"];
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
}
elseif ($from=='enclosure' and $attr=='url') { // from RSS
$dest["{$id_to}@href"] = $source["{$id_from}@{$attr}"];
$dest["{$id_to}"] = $source["{$id_from}@{$attr}"];
}
else {
$dest["{$id_to}@{$attr}"] = $source["{$id_from}@{$attr}"];
}
}
}
}
function normalize_atom_person (&$source, $person, &$dest, $to, $i) {
$id = $this->element_id($person, $i);
$id_to = $this->element_id($to, $i);
// Atom 0.3 <=> Atom 1.0
if ($this->feed_version >= 1.0) { $used = 'uri'; $norm = 'url'; }
else { $used = 'url'; $norm = 'uri'; }
if (isset($source["{$id}_{$used}"])) {
$dest["{$id_to}_{$norm}"] = $source["{$id}_{$used}"];
}
// Atom to RSS 2.0 and Dublin Core
// RSS 2.0 person strings should be valid e-mail addresses if possible.
if (isset($source["{$id}_email"])) {
$rss_author = $source["{$id}_email"];
}
if (isset($source["{$id}_name"])) {
$rss_author = $source["{$id}_name"]
. (isset($rss_author) ? " <$rss_author>" : '');
}
if (isset($rss_author)) {
$source[$id] = $rss_author; // goes to top-level author or contributor
$dest[$id_to] = $rss_author; // goes to dc:creator or dc:contributor
}
}
// Normalize Atom 1.0 and RSS 2.0 categories to Dublin Core...
function normalize_category (&$source, $from, &$dest, $to, $i) {
$cat_id = $this->element_id($from, $i);
$dc_id = $this->element_id($to, $i);
// first normalize category elements: Atom 1.0 <=> RSS 2.0
if ( isset($source["{$cat_id}@term"]) ) { // category identifier
$source[$cat_id] = $source["{$cat_id}@term"];
} elseif ( $this->feed_type == RSS ) {
$source["{$cat_id}@term"] = $source[$cat_id];
}
if ( isset($source["{$cat_id}@scheme"]) ) { // URI to taxonomy
$source["{$cat_id}@domain"] = $source["{$cat_id}@scheme"];
} elseif ( isset($source["{$cat_id}@domain"]) ) {
$source["{$cat_id}@scheme"] = $source["{$cat_id}@domain"];
}
// Now put the identifier into dc:subject
$dest[$dc_id] = $source[$cat_id];
}
// ... or vice versa
function normalize_dc_subject (&$source, $from, &$dest, $to, $i) {
$dc_id = $this->element_id($from, $i);
$cat_id = $this->element_id($to, $i);
$dest[$cat_id] = $source[$dc_id]; // RSS 2.0
$dest["{$cat_id}@term"] = $source[$dc_id]; // Atom 1.0
}
// simplify the logic for normalize(). Makes sure that count of elements and
// each of multiple elements is normalized properly. If you need to mess
// with things like attributes or change formats or the like, pass it a
// callback to handle each element.
function normalize_element (&$source, $from, &$dest, $to, $via = NULL) {
if (isset($source[$from]) or isset($source["{$from}#"])) {
if (isset($source["{$from}#"])) {
$n = $source["{$from}#"];
$dest["{$to}#"] = $source["{$from}#"];
}
else { $n = 1; }
for ($i = 1; $i <= $n; $i++) {
if (isset($via)) { // custom callback for ninja attacks
$this->{$via}($source, $from, $dest, $to, $i);
}
else { // just make it the same
$from_id = $this->element_id($from, $i);
$to_id = $this->element_id($to, $i);
$dest[$to_id] = $source[$from_id];
}
}
}
}
function normalize () {
// if atom populate rss fields and normalize 0.3 and 1.0 feeds
if ( $this->is_atom() ) {
// Atom 1.0 elements <=> Atom 0.3 elements (Thanks, o brilliant wordsmiths of the Atom 1.0 standard!)
if ($this->feed_version < 1.0) {
$this->normalize_element($this->channel, 'tagline', $this->channel, 'subtitle');
$this->normalize_element($this->channel, 'copyright', $this->channel, 'rights');
$this->normalize_element($this->channel, 'modified', $this->channel, 'updated');
} else {
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'tagline');
$this->normalize_element($this->channel, 'rights', $this->channel, 'copyright');
$this->normalize_element($this->channel, 'updated', $this->channel, 'modified');
}
$this->normalize_element($this->channel, 'author', $this->channel['dc'], 'creator', 'normalize_atom_person');
$this->normalize_element($this->channel, 'contributor', $this->channel['dc'], 'contributor', 'normalize_atom_person');
// Atom elements to RSS elements
$this->normalize_element($this->channel, 'subtitle', $this->channel, 'description');
if ( isset($this->channel['logo']) ) {
$this->normalize_element($this->channel, 'logo', $this->image, 'url');
$this->normalize_element($this->channel, 'link', $this->image, 'link');
$this->normalize_element($this->channel, 'title', $this->image, 'title');
}
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
// Atom 1.0 elements <=> Atom 0.3 elements
if ($this->feed_version < 1.0) {
$this->normalize_element($item, 'modified', $item, 'updated');
$this->normalize_element($item, 'issued', $item, 'published');
} else {
$this->normalize_element($item, 'updated', $item, 'modified');
$this->normalize_element($item, 'published', $item, 'issued');
}
// "If an atom:entry element does not contain
// atom:author elements, then the atom:author elements
// of the contained atom:source element are considered
// to apply. In an Atom Feed Document, the atom:author
// elements of the containing atom:feed element are
// considered to apply to the entry if there are no
// atom:author elements in the locations described
// above."
if (!isset($item["author#"])) {
if (isset($item["source_author#"])) { // from aggregation source
$source = $item;
$author = "source_author";
} elseif (isset($this->channel["author#"])) { // from containing feed
$source = $this->channel;
$author = "author";
} else {
$author = null;
}
if (!is_null($author)) {
$item["author#"] = $source["{$author}#"];
for ($au = 1; $au <= $item["author#"]; $au++) {
$id_to = $this->element_id('author', $au);
$id_from = $this->element_id($author, $au);
$item[$id_to] = $source[$id_from];
foreach (array('name', 'email', 'uri', 'url') as $what) {
if (isset($source["{$id_from}_{$what}"])) {
$item["{$id_to}_{$what}"] = $source["{$id_from}_{$what}"];
}
}
}
}
}
// Atom elements to RSS elements
$this->normalize_element($item, 'author', $item['dc'], 'creator', 'normalize_atom_person');
$this->normalize_element($item, 'contributor', $item['dc'], 'contributor', 'normalize_atom_person');
$this->normalize_element($item, 'summary', $item, 'description');
$this->normalize_element($item, 'atom_content', $item['content'], 'encoded');
$this->normalize_element($item, 'link_enclosure', $item, 'enclosure', 'normalize_enclosure');
// Categories
if ( isset($item['category#']) ) { // Atom 1.0 categories to dc:subject and RSS 2.0 categories
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
}
elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
}
// Normalized item timestamp
$atom_date = (isset($item['published']) ) ? $item['published'] : $item['updated'];
if ( $atom_date ) {
$epoch = @parse_w3cdtf($atom_date);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
elseif ( $this->is_rss() ) {
// RSS elements to Atom elements
$this->normalize_element($this->channel, 'description', $this->channel, 'tagline'); // Atom 0.3
$this->normalize_element($this->channel, 'description', $this->channel, 'subtitle'); // Atom 1.0 (yay wordsmithing!)
$this->normalize_element($this->image, 'url', $this->channel, 'logo');
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
// RSS elements to Atom elements
$this->normalize_element($item, 'description', $item, 'summary');
$this->normalize_element($item, 'enclosure', $item, 'link_enclosure', 'normalize_enclosure');
// Categories
if ( isset($item['category#']) ) { // RSS 2.0 categories to dc:subject and Atom 1.0 categories
$this->normalize_element($item, 'category', $item['dc'], 'subject', 'normalize_category');
}
elseif ( isset($item['dc']['subject#']) ) { // dc:subject to Atom 1.0 and RSS 2.0 categories
$this->normalize_element($item['dc'], 'subject', $item, 'category', 'normalize_dc_subject');
}
// Normalized item timestamp
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
$epoch = @parse_w3cdtf($item['dc']['date']);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
elseif ( isset($item['pubdate']) ) {
$epoch = @strtotime($item['pubdate']);
if ($epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
}
function is_rss () {
if ( $this->feed_type == RSS ) {
return $this->feed_version;
}
else {
return false;
}
}
function is_atom() {
if ( $this->feed_type == ATOM ) {
return $this->feed_version;
}
else {
return false;
}
}
/**
* return XML parser, and possibly re-encoded source
*
*/
function create_parser($source, $out_enc, $in_enc, $detect) {
if ( substr(phpversion(),0,1) == 5) {
$parser = $this->php5_create_parser($in_enc, $detect);
}
else {
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
}
if ($out_enc) {
$this->encoding = $out_enc;
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
}
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
return array($parser, $source);
}
/**
* Instantiate an XML parser under PHP5
*
* PHP5 will do a fine job of detecting input encoding
* if passed an empty string as the encoding.
*
* All hail libxml2!
*
*/
function php5_create_parser($in_enc, $detect) {
// by default php5 does a fine job of detecting input encodings
if(!$detect && $in_enc) {
return xml_parser_create($in_enc);
}
else {
return xml_parser_create('');
}
}
/**
* Instaniate an XML parser under PHP4
*
* Unfortunately PHP4's support for character encodings
* and especially XML and character encodings sucks. As
* long as the documents you parse only contain characters
* from the ISO-8859-1 character set (a superset of ASCII,
* and a subset of UTF-8) you're fine. However once you
* step out of that comfy little world things get mad, bad,
* and dangerous to know.
*
* The following code is based on SJM's work with FoF
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
*
*/
function php4_create_parser($source, $in_enc, $detect) {
if ( !$detect ) {
return array(xml_parser_create($in_enc), $source);
}
if (!$in_enc) {
if (preg_match('//m', $source, $m)) {
$in_enc = strtoupper($m[1]);
$this->source_encoding = $in_enc;
}
else {
$in_enc = 'UTF-8';
}
}
if ($this->known_encoding($in_enc)) {
return array(xml_parser_create($in_enc), $source);
}
// the dectected encoding is not one of the simple encodings PHP knows
// attempt to use the iconv extension to
// cast the XML to a known encoding
// @see http://php.net/iconv
if (function_exists('iconv')) {
$encoded_source = iconv($in_enc,'UTF-8', $source);
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// iconv didn't work, try mb_convert_encoding
// @see http://php.net/mbstring
if(function_exists('mb_convert_encoding')) {
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// else
$this->error("Feed is in an unsupported character encoding. ($in_enc) " .
"You may see strange artifacts, and mangled characters.",
E_USER_NOTICE);
return array(xml_parser_create(), $source);
}
function known_encoding($enc) {
$enc = strtoupper($enc);
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
return $enc;
}
else {
return false;
}
}
function error ($errormsg, $lvl=E_USER_WARNING) {
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
$notices = E_USER_NOTICE|E_NOTICE;
if ( $lvl&$notices ) {
$this->WARNING = $errormsg;
} else {
$this->ERROR = $errormsg;
}
}
// magic ID function for multiple elemenets.
// can be called as static MagpieRSS::element_id()
function element_id ($el, $counter) {
return $el . (($counter > 1) ? '#'.$counter : '');
}
function map_attrs($k, $v) {
return $k.'="'.htmlspecialchars($v, ENT_COMPAT, $this->encoding).'"';
}
function accepts_namespaced_xml ($attrs) {
$mode = (isset($attrs['mode']) ? trim(strtolower($attrs['mode'])) : 'xml');
$type = (isset($attrs['type']) ? trim(strtolower($attrs['type'])) : null);
if ($this->feed_type == ATOM and $this->feed_version < 1.0) {
if ($mode=='xml' and preg_match(':[/+](html|xml)$:i', $type)) {
$ret = true;
} else {
$ret = false;
}
} elseif ($this->feed_type == ATOM and $this->feed_version >= 1.0) {
if ($type=='xhtml' or preg_match(':[/+]xml$:i', $type)) {
$ret = true;
} else {
$ret = false;
}
} else {
$ret = false; // Don't munge unless you're sure
}
return $ret;
}
} // end class RSS
// patch to support medieval versions of PHP4.1.x,
// courtesy, Ryan Currie, ryan@digibliss.com
if (!function_exists('array_change_key_case')) {
define("CASE_UPPER",1);
define("CASE_LOWER",0);
function array_change_key_case($array,$case=CASE_LOWER) {
if ($case==CASE_LOWER) $cmd='strtolower';
elseif ($case==CASE_UPPER) $cmd='strtoupper';
foreach($array as $key=>$value) {
$output[$cmd($key)]=$value;
}
return $output;
}
}
################################################################################
## WordPress: Load in Snoopy from wp-includes ##################################
################################################################################
if (!function_exists('wp_remote_request')) :
require_once( dirname(__FILE__) . '/class-snoopy.php');
endif;
################################################################################
## rss_fetch.inc: from MagpieRSS 0.8a ##########################################
################################################################################
/*=======================================================================*\
Function: fetch_rss:
Purpose: return RSS object for the give url
maintain the cache
Input: url of RSS file
Output: parsed RSS object (see rss_parse.inc)
NOTES ON CACHEING:
If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
NOTES ON RETRIEVING REMOTE FILES:
If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
return a cached object, and touch the cache object upon recieving a
304.
NOTES ON FAILED REQUESTS:
If there is an HTTP error while fetching an RSS object, the cached
version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
\*=======================================================================*/
define('MAGPIE_VERSION', '2009.0618');
$MAGPIE_ERROR = "";
function fetch_rss ($url) {
// initialize constants
init();
if ( !isset($url) ) {
error("fetch_rss called without a url");
return false;
}
// if cache is disabled
if ( !MAGPIE_CACHE_ON ) {
// fetch file, and parse it
$resp = _fetch_remote_file( $url );
if ( is_success( $resp->status ) ) {
return _response_to_rss( $resp, $url );
}
else {
error("Failed to fetch $url and cache is off");
return false;
}
}
// else cache is ON
else {
// Flow
// 1. check cache
// 2. if there is a hit, make sure its fresh
// 3. if cached obj fails freshness check, fetch remote
// 4. if remote fails, return stale object, or error
$cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
if (MAGPIE_DEBUG and $cache->ERROR) {
debug($cache->ERROR, E_USER_WARNING);
}
$cache_status = 0; // response of check_cache
$request_headers = array(); // HTTP headers to send with fetch
$rss = 0; // parsed RSS object
$errormsg = 0; // errors, if any
// store parsed XML by desired output encoding
// as character munging happens at parse time
$cache_key = $url . MAGPIE_OUTPUT_ENCODING;
if (!$cache->ERROR) {
// return cache HIT, MISS, or STALE
$cache_status = $cache->check_cache( $cache_key);
}
// if object cached, and cache is fresh, return cached obj
if ( $cache_status == 'HIT' ) {
$rss = $cache->get( $cache_key );
if ( isset($rss) and $rss ) {
// should be cache age
$rss->from_cache = 1;
if ( MAGPIE_DEBUG > 1) {
debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
}
return $rss;
}
}
// else attempt a conditional get
// setup headers
if ( $cache_status == 'STALE' ) {
$rss = $cache->get( $cache_key );
if ( $rss and isset($rss->etag) and $rss->last_modified ) {
$request_headers['If-None-Match'] = $rss->etag;
$request_headers['If-Last-Modified'] = $rss->last_modified;
}
}
$resp = _fetch_remote_file( $url, $request_headers );
if (isset($resp) and $resp) {
if ($resp->status == '304' ) {
// we have the most current copy
if ( MAGPIE_DEBUG > 1) {
debug("Got 304 for $url");
}
// reset cache on 304 (at minutillo insistent prodding)
$cache->set($cache_key, $rss);
return $rss;
}
elseif ( is_success( $resp->status ) ) {
$rss = _response_to_rss( $resp, $url );
if ( $rss ) {
if (MAGPIE_DEBUG > 1) {
debug("Fetch successful");
}
// add object to cache
$cache->set( $cache_key, $rss );
return $rss;
}
}
else {
$errormsg = "Failed to fetch $url ";
if ( $resp->status == '-100' ) {
$errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
}
elseif ( $resp->error ) {
# compensate for Snoopy's annoying habbit to tacking
# on '\n'
$http_error = substr($resp->error, 0, -2);
$errormsg .= "(HTTP Error: $http_error)";
}
else {
$errormsg .= "(HTTP Response: " . $resp->response_code .')';
}
}
}
else {
$errormsg = "Unable to retrieve RSS file for unknown reasons.";
}
// else fetch failed
debug("MagpieRSS fetch failed [$errormsg]");
// attempt to return cached object
if ($rss) {
if ( MAGPIE_DEBUG ) {
debug("Returning STALE object for $url");
}
return $rss;
}
// else we totally failed
error( $errormsg );
return false;
} // end if ( !MAGPIE_CACHE_ON ) {
} // end fetch_rss()
/*=======================================================================*\
Function: error
Purpose: set MAGPIE_ERROR, and trigger error
\*=======================================================================*/
function error ($errormsg, $lvl=E_USER_WARNING) {
global $MAGPIE_ERROR;
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( $errormsg ) {
$errormsg = "MagpieRSS: $errormsg";
$MAGPIE_ERROR = $errormsg;
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
} else {
error_log($errormsg, 0);
}
}
}
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
}
/*=======================================================================*\
Function: magpie_error
Purpose: accessor for the magpie error variable
\*=======================================================================*/
function magpie_error ($errormsg="") {
global $MAGPIE_ERROR;
if ( isset($errormsg) and $errormsg ) {
$MAGPIE_ERROR = $errormsg;
}
return $MAGPIE_ERROR;
}
/*=======================================================================*\
Function: _fetch_remote_file
Purpose: retrieve an arbitrary remote file
Input: url of the remote file
headers to send along with the request (optional)
Output: an HTTP response object (see Snoopy.class.inc)
\*=======================================================================*/
function _fetch_remote_file ($url, $headers = "" ) {
// Ensure that we have constants set up, since they are used below.
init();
// WordPress 2.7 has deprecated Snoopy. It's still there, for now, but
// I'd rather not rely on it.
if (function_exists('wp_remote_request')) :
$resp = wp_remote_request($url, array(
'headers' => $headers,
'timeout' => MAGPIE_FETCH_TIME_OUT)
);
if ( is_wp_error($resp) ) :
$error = $resp->get_error_messages();
$client = new stdClass;
$client->status = 500;
$client->response_code = 500;
$client->error = implode(" / ", $error). "\n"; //\n = Snoopy compatibility
else :
$client = new stdClass;
$client->status = $resp['response']['code'];
$client->response_code = $resp['response']['code'];
$client->headers = $resp['headers'];
$client->results = $resp['body'];
endif;
else :
// Snoopy is an HTTP client in PHP
$client = new Snoopy();
$client->agent = MAGPIE_USER_AGENT;
$client->read_timeout = MAGPIE_FETCH_TIME_OUT;
$client->use_gzip = MAGPIE_USE_GZIP;
if (is_array($headers) ) {
$client->rawheaders = $headers;
}
@$client->fetch($url);
endif;
return $client;
}
/*=======================================================================*\
Function: _response_to_rss
Purpose: parse an HTTP response object into an RSS object
Input: an HTTP response object (see Snoopy)
Output: parsed RSS object (see rss_parse)
\*=======================================================================*/
function _response_to_rss ($resp, $url = null) {
$rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING, $url );
// if RSS parsed successfully
if ( $rss and !$rss->ERROR) {
$rss->http_status = $resp->status;
// find Etag, and Last-Modified
foreach($resp->headers as $h) {
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
if (strpos($h, ": ")) {
list($field, $val) = explode(": ", $h, 2);
}
else {
$field = $h;
$val = "";
}
$rss->header[$field] = $val;
if ( $field == 'ETag' ) {
$rss->etag = $val;
}
if ( $field == 'Last-Modified' ) {
$rss->last_modified = $val;
}
}
return $rss;
} // else construct error message
else {
$errormsg = "Failed to parse RSS file.";
if ($rss) {
$errormsg .= " (" . $rss->ERROR . ")";
}
error($errormsg);
return false;
} // end if ($rss and !$rss->error)
}
/*=======================================================================*\
Function: init
Purpose: setup constants with default values
check for user overrides
\*=======================================================================*/
function init () {
if ( defined('MAGPIE_INITALIZED') ) {
return;
}
else {
define('MAGPIE_INITALIZED', true);
}
if ( !defined('MAGPIE_CACHE_ON') ) {
define('MAGPIE_CACHE_ON', true);
}
if ( !defined('MAGPIE_CACHE_DIR') ) {
define('MAGPIE_CACHE_DIR', './cache');
}
if ( !defined('MAGPIE_CACHE_AGE') ) {
define('MAGPIE_CACHE_AGE', 60*60); // one hour
}
if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
define('MAGPIE_CACHE_FRESH_ONLY', false);
}
if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
}
if ( !defined('MAGPIE_INPUT_ENCODING') ) {
define('MAGPIE_INPUT_ENCODING', null);
}
if ( !defined('MAGPIE_DETECT_ENCODING') ) {
define('MAGPIE_DETECT_ENCODING', true);
}
if ( !defined('MAGPIE_DEBUG') ) {
define('MAGPIE_DEBUG', 0);
}
if ( !defined('MAGPIE_USER_AGENT') ) {
$ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
if ( MAGPIE_CACHE_ON ) {
$ua = $ua . ')';
}
else {
$ua = $ua . '; No cache)';
}
define('MAGPIE_USER_AGENT', $ua);
}
if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
}
// use gzip encoding to fetch rss files if supported?
if ( !defined('MAGPIE_USE_GZIP') ) {
define('MAGPIE_USE_GZIP', true);
}
}
// NOTE: the following code should really be in Snoopy, or at least
// somewhere other then rss_fetch!
/*=======================================================================*\
HTTP STATUS CODE PREDICATES
These functions attempt to classify an HTTP status code
based on RFC 2616 and RFC 2518.
All of them take an HTTP status code as input, and return true or false
All this code is adapted from LWP's HTTP::Status.
\*=======================================================================*/
/*=======================================================================*\
Function: is_info
Purpose: return true if Informational status code
\*=======================================================================*/
function is_info ($sc) {
return $sc >= 100 && $sc < 200;
}
/*=======================================================================*\
Function: is_success
Purpose: return true if Successful status code
\*=======================================================================*/
function is_success ($sc) {
return $sc >= 200 && $sc < 300;
}
/*=======================================================================*\
Function: is_redirect
Purpose: return true if Redirection status code
\*=======================================================================*/
function is_redirect ($sc) {
return $sc >= 300 && $sc < 400;
}
/*=======================================================================*\
Function: is_error
Purpose: return true if Error status code
\*=======================================================================*/
function is_error ($sc) {
return $sc >= 400 && $sc < 600;
}
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a client error
\*=======================================================================*/
function is_client_error ($sc) {
return $sc >= 400 && $sc < 500;
}
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a server error
\*=======================================================================*/
function is_server_error ($sc) {
return $sc >= 500 && $sc < 600;
}
################################################################################
## rss_cache.inc: from WordPress 1.5 ###########################################
################################################################################
class RSSCache {
var $BASE_CACHE = 'wp-content/cache'; // where the cache files are stored
var $MAX_AGE = 43200; // when are files stale, default twelve hours
var $ERROR = ''; // accumulate error messages
function RSSCache ($base='', $age='') {
if ( $base ) {
$this->BASE_CACHE = $base;
}
if ( $age ) {
$this->MAX_AGE = $age;
}
}
/*=======================================================================*\
Function: set
Purpose: add an item to the cache, keyed on url
Input: url from wich the rss file was fetched
Output: true on sucess
\*=======================================================================*/
function set ($url, $rss) {
global $wpdb;
$cache_option = 'rss_' . $this->file_name( $url );
$cache_timestamp = 'rss_' . $this->file_name( $url ) . '_ts';
if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_option'") )
add_option($cache_option, '', '', 'no');
if ( !$wpdb->get_var("SELECT option_name FROM $wpdb->options WHERE option_name = '$cache_timestamp'") )
add_option($cache_timestamp, '', '', 'no');
update_option($cache_option, $rss);
update_option($cache_timestamp, time() );
return $cache_option;
}
/*=======================================================================*\
Function: get
Purpose: fetch an item from the cache
Input: url from wich the rss file was fetched
Output: cached object on HIT, false on MISS
\*=======================================================================*/
function get ($url) {
$this->ERROR = "";
$cache_option = 'rss_' . $this->file_name( $url );
if ( ! get_option( $cache_option ) ) {
$this->debug(
"Cache doesn't contain: $url (cache option: $cache_option)"
);
return 0;
}
$rss = get_option( $cache_option );
// failsafe; seems to break at odd points in WP MU
if (is_string($rss)) {
$rss = $this->unserialize($rss);
}
return $rss;
}
/*=======================================================================*\
Function: check_cache
Purpose: check a url for membership in the cache
and whether the object is older then MAX_AGE (ie. STALE)
Input: url from wich the rss file was fetched
Output: cached object on HIT, false on MISS
\*=======================================================================*/
function check_cache ( $url ) {
$this->ERROR = "";
$cache_option = $this->file_name( $url );
$cache_timestamp = 'rss_' . $this->file_name( $url ) . '_ts';
if ( $mtime = get_option($cache_timestamp) ) {
// find how long ago the file was added to the cache
// and whether that is longer then MAX_AGE
$age = time() - $mtime;
if ( $this->MAX_AGE > $age ) {
// object exists and is current
return 'HIT';
}
else {
// object exists but is old
return 'STALE';
}
}
else {
// object does not exist
return 'MISS';
}
}
/*=======================================================================*\
Function: serialize
\*=======================================================================*/
function serialize ( $rss ) {
return serialize( $rss );
}
/*=======================================================================*\
Function: unserialize
\*=======================================================================*/
function unserialize ( $data ) {
return unserialize( $data );
}
/*=======================================================================*\
Function: file_name
Purpose: map url to location in cache
Input: url from wich the rss file was fetched
Output: a file name
\*=======================================================================*/
function file_name ($url) {
return md5( $url );
}
/*=======================================================================*\
Function: error
Purpose: register error
\*=======================================================================*/
function error ($errormsg, $lvl=E_USER_WARNING) {
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
$this->ERROR = $errormsg;
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
}
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
if ( MAGPIE_DEBUG ) {
$this->error("MagpieRSS [debug] $debugmsg", $lvl);
}
}
}
################################################################################
## rss_utils.inc: from MagpieRSS 0.8a ##########################################
################################################################################
/*======================================================================*\
Function: parse_w3cdtf
Purpose: parse a W3CDTF date into unix epoch
NOTE: http://www.w3.org/TR/NOTE-datetime
\*======================================================================*/
function parse_w3cdtf ( $date_str ) {
# regex to match wc3dtf
$pat = "/^\s*(\d{4})(-(\d{2})(-(\d{2})(T(\d{2}):(\d{2})(:(\d{2})(\.\d+)?)?(?:([-+])(\d{2}):?(\d{2})|(Z))?)?)?)?\s*\$/";
if ( preg_match( $pat, $date_str, $match ) ) {
list( $year, $month, $day, $hours, $minutes, $seconds) =
array( $match[1], $match[3], $match[5], $match[7], $match[8], $match[10]);
# W3C dates can omit the time, the day of the month, or even the month.
# Fill in any blanks using information from the present moment. --CWJ
$default['hr'] = (int) gmdate('H');
$default['day'] = (int) gmdate('d');
$default['month'] = (int) gmdate('m');
if (is_null($hours)) : $hours = $default['hr']; $minutes = 0; $seconds = 0; endif;
if (is_null($day)) : $day = $default['day']; endif;
if (is_null($month)) : $month = $default['month']; endif;
# calc epoch for current date assuming GMT
$epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);
$offset = 0;
if ( $match[15] == 'Z' ) {
# zulu time, aka GMT
}
else {
list( $tz_mod, $tz_hour, $tz_min ) =
array( $match[12], $match[13], $match[14]);
# zero out the variables
if ( ! $tz_hour ) { $tz_hour = 0; }
if ( ! $tz_min ) { $tz_min = 0; }
$offset_secs = (($tz_hour*60)+$tz_min)*60;
# is timezone ahead of GMT? then subtract offset
#
if ( $tz_mod == '+' ) {
$offset_secs = $offset_secs * -1;
}
$offset = $offset_secs;
}
$epoch = $epoch + $offset;
return $epoch;
}
else {
return -1;
}
}
# Relative URI static class: PHP class for resolving relative URLs
#
# This class is derived (under the terms of the GPL) from URL Class 0.3 by
# Keyvan Minoukadeh , which is great but more than we need
# for MagpieRSS's purposes. The class has been stripped down to a single
# public method: Relative_URI::resolve($url, $base), which resolves the URI in
# $url relative to the URI in $base
#
# FeedWordPress also uses this class. So if we have it loaded in, don't load it
# again.
#
# -- Charles Johnson
if (!class_exists('Relative_URI')) {
class Relative_URI
{
// Resolve relative URI in $url against the base URI in $base. If $base
// is not supplied, then we use the REQUEST_URI of this script.
//
// I'm hoping this method reflects RFC 2396 Section 5.2
function resolve ($url, $base = NULL)
{
if (is_null($base)):
$base = 'http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'];
endif;
$base = Relative_URI::_encode(trim($base));
$uri_parts = Relative_URI::_parse_url($base);
$url = Relative_URI::_encode(trim($url));
$parts = Relative_URI::_parse_url($url);
$uri_parts['fragment'] = (isset($parts['fragment']) ? $parts['fragment'] : null);
$uri_parts['query'] = (isset($parts['query']) ? $parts['query'] : null);
// if path is empty, and scheme, host, and query are undefined,
// the URL is referring the base URL
if (($parts['path'] == '') && !isset($parts['scheme']) && !isset($parts['host']) && !isset($parts['query'])) {
// If the URI is empty or only a fragment, return the base URI
return $base . (isset($parts['fragment']) ? '#'.$parts['fragment'] : '');
} elseif (isset($parts['scheme'])) {
// If the scheme is set, then the URI is absolute.
return $url;
} elseif (isset($parts['host'])) {
$uri_parts['host'] = $parts['host'];
$uri_parts['path'] = $parts['path'];
} else {
// We have a relative path but not a host.
// start ugly fix:
// prepend slash to path if base host is set, base path is not set, and url path is not absolute
if ($uri_parts['host'] && ($uri_parts['path'] == '')
&& (strlen($parts['path']) > 0)
&& (substr($parts['path'], 0, 1) != '/')) {
$parts['path'] = '/'.$parts['path'];
} // end ugly fix
if (substr($parts['path'], 0, 1) == '/') {
$uri_parts['path'] = $parts['path'];
} else {
// copy base path excluding any characters after the last (right-most) slash character
$buffer = substr($uri_parts['path'], 0, (int)strrpos($uri_parts['path'], '/')+1);
// append relative path
$buffer .= $parts['path'];
// remove "./" where "." is a complete path segment.
$buffer = str_replace('/./', '/', $buffer);
if (substr($buffer, 0, 2) == './') {
$buffer = substr($buffer, 2);
}
// if buffer ends with "." as a complete path segment, remove it
if (substr($buffer, -2) == '/.') {
$buffer = substr($buffer, 0, -1);
}
// remove "/../" where is a complete path segment not equal to ".."
$search_finished = false;
$segment = explode('/', $buffer);
while (!$search_finished) {
for ($x=0; $x+1 < count($segment);) {
if (($segment[$x] != '') && ($segment[$x] != '..') && ($segment[$x+1] == '..')) {
if ($x+2 == count($segment)) $segment[] = '';
unset($segment[$x], $segment[$x+1]);
$segment = array_values($segment);
continue 2;
} else {
$x++;
}
}
$search_finished = true;
}
$buffer = (count($segment) == 1) ? '/' : implode('/', $segment);
$uri_parts['path'] = $buffer;
}
}
// If we've gotten to this point, we can try to put the pieces
// back together.
$ret = '';
if (isset($uri_parts['scheme'])) $ret .= $uri_parts['scheme'].':';
if (isset($uri_parts['user'])) {
$ret .= $uri_parts['user'];
if (isset($uri_parts['pass'])) $ret .= ':'.$uri_parts['parts'];
$ret .= '@';
}
if (isset($uri_parts['host'])) {
$ret .= '//'.$uri_parts['host'];
if (isset($uri_parts['port'])) $ret .= ':'.$uri_parts['port'];
}
$ret .= $uri_parts['path'];
if (isset($uri_parts['query'])) $ret .= '?'.$uri_parts['query'];
if (isset($uri_parts['fragment'])) $ret .= '#'.$uri_parts['fragment'];
return $ret;
}
/**
* Parse URL
*
* Regular expression grabbed from RFC 2396 Appendix B.
* This is a replacement for PHPs builtin parse_url().
* @param string $url
* @access private
* @return array
*/
function _parse_url($url)
{
// I'm using this pattern instead of parse_url() as there's a few strings where parse_url()
// generates a warning.
if (preg_match('!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!', $url, $match)) {
$parts = array();
if ($match[1] != '') $parts['scheme'] = $match[2];
if ($match[3] != '') $parts['auth'] = $match[4];
// parse auth
if (isset($parts['auth'])) {
// store user info
if (($at_pos = strpos($parts['auth'], '@')) !== false) {
$userinfo = explode(':', substr($parts['auth'], 0, $at_pos), 2);
$parts['user'] = $userinfo[0];
if (isset($userinfo[1])) $parts['pass'] = $userinfo[1];
$parts['auth'] = substr($parts['auth'], $at_pos+1);
}
// get port number
if ($port_pos = strrpos($parts['auth'], ':')) {
$parts['host'] = substr($parts['auth'], 0, $port_pos);
$parts['port'] = (int)substr($parts['auth'], $port_pos+1);
if ($parts['port'] < 1) $parts['port'] = null;
} else {
$parts['host'] = $parts['auth'];
}
}
unset($parts['auth']);
$parts['path'] = $match[5];
if (isset($match[6]) && ($match[6] != '')) $parts['query'] = $match[7];
if (isset($match[8]) && ($match[8] != '')) $parts['fragment'] = $match[9];
return $parts;
}
// shouldn't reach here
return array('path'=>'');
}
function _encode($string)
{
static $replace = array();
if (!count($replace)) {
$find = array(32, 34, 60, 62, 123, 124, 125, 91, 92, 93, 94, 96, 127);
$find = array_merge(range(0, 31), $find);
$find = array_map('chr', $find);
foreach ($find as $char) {
$replace[$char] = '%'.bin2hex($char);
}
}
// escape control characters and a few other characters
$encoded = strtr($string, $replace);
// remove any character outside the hex range: 21 - 7E (see www.asciitable.com)
return preg_replace('/[^\x21-\x7e]/', '', $encoded);
}
} // class Relative_URI
}
################################################################################
## WordPress: wp_rss(), get_rss() ##############################################
################################################################################
function wp_rss ($url, $num) {
//ini_set("display_errors", false); uncomment to suppress php errors thrown if the feed is not returned.
$num_items = $num;
$rss = fetch_rss($url);
if ( $rss ) {
echo "";
}
else {
echo "an error has occured the feed is probably down, try again later.";
}
}
function get_rss ($uri, $num = 5) { // Like get posts, but for RSS
$rss = fetch_rss($url);
if ( $rss ) {
$rss->items = array_slice($rss->items, 0, $num_items);
foreach ($rss->items as $item ) {
echo "\n";
echo "";
echo htmlentities($item['title']);
echo "
\n";
echo "\n";
}
return $posts;
} else {
return false;
}
}
?>