[44] | 1 | <?php
|
---|
| 2 | /**
|
---|
| 3 | * Atom Syndication Format PHP Library
|
---|
| 4 | *
|
---|
| 5 | * @package AtomLib
|
---|
| 6 | * @link http://code.google.com/p/phpatomlib/
|
---|
| 7 | *
|
---|
| 8 | * @author Elias Torres <elias@torrez.us>
|
---|
| 9 | * @version 0.4
|
---|
| 10 | * @since 2.3
|
---|
| 11 | */
|
---|
| 12 |
|
---|
| 13 | /**
|
---|
| 14 | * Structure that store common Atom Feed Properties
|
---|
| 15 | *
|
---|
| 16 | * @package AtomLib
|
---|
| 17 | */
|
---|
| 18 | class AtomFeed {
|
---|
| 19 | /**
|
---|
| 20 | * Stores Links
|
---|
| 21 | * @var array
|
---|
| 22 | * @access public
|
---|
| 23 | */
|
---|
| 24 | var $links = array();
|
---|
| 25 | /**
|
---|
| 26 | * Stores Categories
|
---|
| 27 | * @var array
|
---|
| 28 | * @access public
|
---|
| 29 | */
|
---|
| 30 | var $categories = array();
|
---|
| 31 | /**
|
---|
| 32 | * Stores Entries
|
---|
| 33 | *
|
---|
| 34 | * @var array
|
---|
| 35 | * @access public
|
---|
| 36 | */
|
---|
| 37 | var $entries = array();
|
---|
| 38 | }
|
---|
| 39 |
|
---|
| 40 | /**
|
---|
| 41 | * Structure that store Atom Entry Properties
|
---|
| 42 | *
|
---|
| 43 | * @package AtomLib
|
---|
| 44 | */
|
---|
| 45 | class AtomEntry {
|
---|
| 46 | /**
|
---|
| 47 | * Stores Links
|
---|
| 48 | * @var array
|
---|
| 49 | * @access public
|
---|
| 50 | */
|
---|
| 51 | var $links = array();
|
---|
| 52 | /**
|
---|
| 53 | * Stores Categories
|
---|
| 54 | * @var array
|
---|
| 55 | * @access public
|
---|
| 56 | */
|
---|
| 57 | var $categories = array();
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | /**
|
---|
| 61 | * AtomLib Atom Parser API
|
---|
| 62 | *
|
---|
| 63 | * @package AtomLib
|
---|
| 64 | */
|
---|
| 65 | class AtomParser {
|
---|
| 66 |
|
---|
| 67 | var $NS = 'http://www.w3.org/2005/Atom';
|
---|
| 68 | var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights');
|
---|
| 69 | var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft');
|
---|
| 70 |
|
---|
| 71 | var $debug = false;
|
---|
| 72 |
|
---|
| 73 | var $depth = 0;
|
---|
| 74 | var $indent = 2;
|
---|
| 75 | var $in_content;
|
---|
| 76 | var $ns_contexts = array();
|
---|
| 77 | var $ns_decls = array();
|
---|
| 78 | var $content_ns_decls = array();
|
---|
| 79 | var $content_ns_contexts = array();
|
---|
| 80 | var $is_xhtml = false;
|
---|
| 81 | var $is_html = false;
|
---|
| 82 | var $is_text = true;
|
---|
| 83 | var $skipped_div = false;
|
---|
| 84 |
|
---|
| 85 | var $FILE = "php://input";
|
---|
| 86 |
|
---|
| 87 | var $feed;
|
---|
| 88 | var $current;
|
---|
| 89 |
|
---|
| 90 | function AtomParser() {
|
---|
| 91 |
|
---|
| 92 | $this->feed = new AtomFeed();
|
---|
| 93 | $this->current = null;
|
---|
| 94 | $this->map_attrs_func = create_function('$k,$v', 'return "$k=\"$v\"";');
|
---|
| 95 | $this->map_xmlns_func = create_function('$p,$n', '$xd = "xmlns"; if(strlen($n[0])>0) $xd .= ":{$n[0]}"; return "{$xd}=\"{$n[1]}\"";');
|
---|
| 96 | }
|
---|
| 97 |
|
---|
| 98 | function _p($msg) {
|
---|
| 99 | if($this->debug) {
|
---|
| 100 | print str_repeat(" ", $this->depth * $this->indent) . $msg ."\n";
|
---|
| 101 | }
|
---|
| 102 | }
|
---|
| 103 |
|
---|
| 104 | function error_handler($log_level, $log_text, $error_file, $error_line) {
|
---|
| 105 | $this->error = $log_text;
|
---|
| 106 | }
|
---|
| 107 |
|
---|
| 108 | function parse() {
|
---|
| 109 |
|
---|
| 110 | set_error_handler(array(&$this, 'error_handler'));
|
---|
| 111 |
|
---|
| 112 | array_unshift($this->ns_contexts, array());
|
---|
| 113 |
|
---|
| 114 | $parser = xml_parser_create_ns();
|
---|
| 115 | xml_set_object($parser, $this);
|
---|
| 116 | xml_set_element_handler($parser, "start_element", "end_element");
|
---|
| 117 | xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0);
|
---|
| 118 | xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0);
|
---|
| 119 | xml_set_character_data_handler($parser, "cdata");
|
---|
| 120 | xml_set_default_handler($parser, "_default");
|
---|
| 121 | xml_set_start_namespace_decl_handler($parser, "start_ns");
|
---|
| 122 | xml_set_end_namespace_decl_handler($parser, "end_ns");
|
---|
| 123 |
|
---|
| 124 | $this->content = '';
|
---|
| 125 |
|
---|
| 126 | $ret = true;
|
---|
| 127 |
|
---|
| 128 | $fp = fopen($this->FILE, "r");
|
---|
| 129 | while ($data = fread($fp, 4096)) {
|
---|
| 130 | if($this->debug) $this->content .= $data;
|
---|
| 131 |
|
---|
| 132 | if(!xml_parse($parser, $data, feof($fp))) {
|
---|
| 133 | trigger_error(sprintf(__('XML error: %s at line %d')."\n",
|
---|
| 134 | xml_error_string(xml_get_error_code($xml_parser)),
|
---|
| 135 | xml_get_current_line_number($xml_parser)));
|
---|
| 136 | $ret = false;
|
---|
| 137 | break;
|
---|
| 138 | }
|
---|
| 139 | }
|
---|
| 140 | fclose($fp);
|
---|
| 141 |
|
---|
| 142 | xml_parser_free($parser);
|
---|
| 143 |
|
---|
| 144 | restore_error_handler();
|
---|
| 145 |
|
---|
| 146 | return $ret;
|
---|
| 147 | }
|
---|
| 148 |
|
---|
| 149 | function start_element($parser, $name, $attrs) {
|
---|
| 150 |
|
---|
| 151 | $tag = array_pop(split(":", $name));
|
---|
| 152 |
|
---|
| 153 | switch($name) {
|
---|
| 154 | case $this->NS . ':feed':
|
---|
| 155 | $this->current = $this->feed;
|
---|
| 156 | break;
|
---|
| 157 | case $this->NS . ':entry':
|
---|
| 158 | $this->current = new AtomEntry();
|
---|
| 159 | break;
|
---|
| 160 | };
|
---|
| 161 |
|
---|
| 162 | $this->_p("start_element('$name')");
|
---|
| 163 | #$this->_p(print_r($this->ns_contexts,true));
|
---|
| 164 | #$this->_p('current(' . $this->current . ')');
|
---|
| 165 |
|
---|
| 166 | array_unshift($this->ns_contexts, $this->ns_decls);
|
---|
| 167 |
|
---|
| 168 | $this->depth++;
|
---|
| 169 |
|
---|
| 170 | if(!empty($this->in_content)) {
|
---|
| 171 |
|
---|
| 172 | $this->content_ns_decls = array();
|
---|
| 173 |
|
---|
| 174 | if($this->is_html || $this->is_text)
|
---|
| 175 | trigger_error("Invalid content in element found. Content must not be of type text or html if it contains markup.");
|
---|
| 176 |
|
---|
| 177 | $attrs_prefix = array();
|
---|
| 178 |
|
---|
| 179 | // resolve prefixes for attributes
|
---|
| 180 | foreach($attrs as $key => $value) {
|
---|
| 181 | $with_prefix = $this->ns_to_prefix($key, true);
|
---|
| 182 | $attrs_prefix[$with_prefix[1]] = $this->xml_escape($value);
|
---|
| 183 | }
|
---|
| 184 |
|
---|
| 185 | $attrs_str = join(' ', array_map($this->map_attrs_func, array_keys($attrs_prefix), array_values($attrs_prefix)));
|
---|
| 186 | if(strlen($attrs_str) > 0) {
|
---|
| 187 | $attrs_str = " " . $attrs_str;
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | $with_prefix = $this->ns_to_prefix($name);
|
---|
| 191 |
|
---|
| 192 | if(!$this->is_declared_content_ns($with_prefix[0])) {
|
---|
| 193 | array_push($this->content_ns_decls, $with_prefix[0]);
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | $xmlns_str = '';
|
---|
| 197 | if(count($this->content_ns_decls) > 0) {
|
---|
| 198 | array_unshift($this->content_ns_contexts, $this->content_ns_decls);
|
---|
| 199 | $xmlns_str .= join(' ', array_map($this->map_xmlns_func, array_keys($this->content_ns_contexts[0]), array_values($this->content_ns_contexts[0])));
|
---|
| 200 | if(strlen($xmlns_str) > 0) {
|
---|
| 201 | $xmlns_str = " " . $xmlns_str;
|
---|
| 202 | }
|
---|
| 203 | }
|
---|
| 204 |
|
---|
| 205 | array_push($this->in_content, array($tag, $this->depth, "<". $with_prefix[1] ."{$xmlns_str}{$attrs_str}" . ">"));
|
---|
| 206 |
|
---|
| 207 | } else if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) {
|
---|
| 208 | $this->in_content = array();
|
---|
| 209 | $this->is_xhtml = $attrs['type'] == 'xhtml';
|
---|
| 210 | $this->is_html = $attrs['type'] == 'html' || $attrs['type'] == 'text/html';
|
---|
| 211 | $this->is_text = !in_array('type',array_keys($attrs)) || $attrs['type'] == 'text';
|
---|
| 212 | $type = $this->is_xhtml ? 'XHTML' : ($this->is_html ? 'HTML' : ($this->is_text ? 'TEXT' : $attrs['type']));
|
---|
| 213 |
|
---|
| 214 | if(in_array('src',array_keys($attrs))) {
|
---|
| 215 | $this->current->$tag = $attrs;
|
---|
| 216 | } else {
|
---|
| 217 | array_push($this->in_content, array($tag,$this->depth, $type));
|
---|
| 218 | }
|
---|
| 219 | } else if($tag == 'link') {
|
---|
| 220 | array_push($this->current->links, $attrs);
|
---|
| 221 | } else if($tag == 'category') {
|
---|
| 222 | array_push($this->current->categories, $attrs);
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 | $this->ns_decls = array();
|
---|
| 226 | }
|
---|
| 227 |
|
---|
| 228 | function end_element($parser, $name) {
|
---|
| 229 |
|
---|
| 230 | $tag = array_pop(split(":", $name));
|
---|
| 231 |
|
---|
| 232 | $ccount = count($this->in_content);
|
---|
| 233 |
|
---|
| 234 | # if we are *in* content, then let's proceed to serialize it
|
---|
| 235 | if(!empty($this->in_content)) {
|
---|
| 236 | # if we are ending the original content element
|
---|
| 237 | # then let's finalize the content
|
---|
| 238 | if($this->in_content[0][0] == $tag &&
|
---|
| 239 | $this->in_content[0][1] == $this->depth) {
|
---|
| 240 | $origtype = $this->in_content[0][2];
|
---|
| 241 | array_shift($this->in_content);
|
---|
| 242 | $newcontent = array();
|
---|
| 243 | foreach($this->in_content as $c) {
|
---|
| 244 | if(count($c) == 3) {
|
---|
| 245 | array_push($newcontent, $c[2]);
|
---|
| 246 | } else {
|
---|
| 247 | if($this->is_xhtml || $this->is_text) {
|
---|
| 248 | array_push($newcontent, $this->xml_escape($c));
|
---|
| 249 | } else {
|
---|
| 250 | array_push($newcontent, $c);
|
---|
| 251 | }
|
---|
| 252 | }
|
---|
| 253 | }
|
---|
| 254 | if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS)) {
|
---|
| 255 | $this->current->$tag = array($origtype, join('',$newcontent));
|
---|
| 256 | } else {
|
---|
| 257 | $this->current->$tag = join('',$newcontent);
|
---|
| 258 | }
|
---|
| 259 | $this->in_content = array();
|
---|
| 260 | } else if($this->in_content[$ccount-1][0] == $tag &&
|
---|
| 261 | $this->in_content[$ccount-1][1] == $this->depth) {
|
---|
| 262 | $this->in_content[$ccount-1][2] = substr($this->in_content[$ccount-1][2],0,-1) . "/>";
|
---|
| 263 | } else {
|
---|
| 264 | # else, just finalize the current element's content
|
---|
| 265 | $endtag = $this->ns_to_prefix($name);
|
---|
| 266 | array_push($this->in_content, array($tag, $this->depth, "</$endtag[1]>"));
|
---|
| 267 | }
|
---|
| 268 | }
|
---|
| 269 |
|
---|
| 270 | array_shift($this->ns_contexts);
|
---|
| 271 |
|
---|
| 272 | $this->depth--;
|
---|
| 273 |
|
---|
| 274 | if($name == ($this->NS . ':entry')) {
|
---|
| 275 | array_push($this->feed->entries, $this->current);
|
---|
| 276 | $this->current = null;
|
---|
| 277 | }
|
---|
| 278 |
|
---|
| 279 | $this->_p("end_element('$name')");
|
---|
| 280 | }
|
---|
| 281 |
|
---|
| 282 | function start_ns($parser, $prefix, $uri) {
|
---|
| 283 | $this->_p("starting: " . $prefix . ":" . $uri);
|
---|
| 284 | array_push($this->ns_decls, array($prefix,$uri));
|
---|
| 285 | }
|
---|
| 286 |
|
---|
| 287 | function end_ns($parser, $prefix) {
|
---|
| 288 | $this->_p("ending: #" . $prefix . "#");
|
---|
| 289 | }
|
---|
| 290 |
|
---|
| 291 | function cdata($parser, $data) {
|
---|
| 292 | $this->_p("data: #" . str_replace(array("\n"), array("\\n"), trim($data)) . "#");
|
---|
| 293 | if(!empty($this->in_content)) {
|
---|
| 294 | array_push($this->in_content, $data);
|
---|
| 295 | }
|
---|
| 296 | }
|
---|
| 297 |
|
---|
| 298 | function _default($parser, $data) {
|
---|
| 299 | # when does this gets called?
|
---|
| 300 | }
|
---|
| 301 |
|
---|
| 302 |
|
---|
| 303 | function ns_to_prefix($qname, $attr=false) {
|
---|
| 304 | # split 'http://www.w3.org/1999/xhtml:div' into ('http','//www.w3.org/1999/xhtml','div')
|
---|
| 305 | $components = split(":", $qname);
|
---|
| 306 |
|
---|
| 307 | # grab the last one (e.g 'div')
|
---|
| 308 | $name = array_pop($components);
|
---|
| 309 |
|
---|
| 310 | if(!empty($components)) {
|
---|
| 311 | # re-join back the namespace component
|
---|
| 312 | $ns = join(":",$components);
|
---|
| 313 | foreach($this->ns_contexts as $context) {
|
---|
| 314 | foreach($context as $mapping) {
|
---|
| 315 | if($mapping[1] == $ns && strlen($mapping[0]) > 0) {
|
---|
| 316 | return array($mapping, "$mapping[0]:$name");
|
---|
| 317 | }
|
---|
| 318 | }
|
---|
| 319 | }
|
---|
| 320 | }
|
---|
| 321 |
|
---|
| 322 | if($attr) {
|
---|
| 323 | return array(null, $name);
|
---|
| 324 | } else {
|
---|
| 325 | foreach($this->ns_contexts as $context) {
|
---|
| 326 | foreach($context as $mapping) {
|
---|
| 327 | if(strlen($mapping[0]) == 0) {
|
---|
| 328 | return array($mapping, $name);
|
---|
| 329 | }
|
---|
| 330 | }
|
---|
| 331 | }
|
---|
| 332 | }
|
---|
| 333 | }
|
---|
| 334 |
|
---|
| 335 | function is_declared_content_ns($new_mapping) {
|
---|
| 336 | foreach($this->content_ns_contexts as $context) {
|
---|
| 337 | foreach($context as $mapping) {
|
---|
| 338 | if($new_mapping == $mapping) {
|
---|
| 339 | return true;
|
---|
| 340 | }
|
---|
| 341 | }
|
---|
| 342 | }
|
---|
| 343 | return false;
|
---|
| 344 | }
|
---|
| 345 |
|
---|
| 346 | function xml_escape($string)
|
---|
| 347 | {
|
---|
| 348 | return str_replace(array('&','"',"'",'<','>'),
|
---|
| 349 | array('&','"',''','<','>'),
|
---|
| 350 | $string );
|
---|
| 351 | }
|
---|
| 352 | }
|
---|
| 353 |
|
---|
| 354 | ?>
|
---|