source: trunk/ 44

Last change on this file since 44 was 44, checked in by luciano, 14 years ago
File size: 42.4 KB
2class SyndicatedPost {
3 var $item = null;
5 var $link = null;
6 var $feed = null;
7 var $feedmeta = null;
9 var $post = array ();
11 var $_freshness = null;
12 var $_wp_id = null;
14 function SyndicatedPost ($item, $link) {
15 global $wpdb;
17 $this->link = $link;
18 $feedmeta = $link->settings;
19 $feed = $link->magpie;
21 # This is ugly as all hell. I'd like to use apply_filters()'s
22 # alleged support for a variable argument count, but this seems
23 # to have been broken in WordPress 1.5. It'll be fixed somehow
24 # in WP 1.5.1, but I'm aiming at WP 1.5 compatibility across
25 # the board here.
26 #
27 # Cf.: <>
28 global $fwp_channel, $fwp_feedmeta;
29 $fwp_channel = $feed; $fwp_feedmeta = $feedmeta;
31 $this->feed = $feed;
32 $this->feedmeta = $feedmeta;
34 $this->item = $item;
35 $this->item = apply_filters('syndicated_item', $this->item, $this);
37 # Filters can halt further processing by returning NULL
38 if (is_null($this->item)) :
39 $this->post = NULL;
40 else :
41 # Note that nothing is run through $wpdb->escape() here.
42 # That's deliberate. The escaping is done at the point
43 # of insertion, not here, to avoid double-escaping and
44 # to avoid screwing with syndicated_post filters
46 $this->post['post_title'] = apply_filters('syndicated_item_title', $this->item['title'], $this);
48 // This just gives us an alphanumeric representation of
49 // the author. We will look up (or create) the numeric
50 // ID for the author in SyndicatedPost::add()
51 $this->post['named']['author'] = apply_filters('syndicated_item_author', $this->author(), $this);
53 # Identify content and sanitize it.
54 # ---------------------------------
55 if (isset($this->item['atom_content'])) :
56 $content = $this->item['atom_content'];
57 elseif (isset($this->item['xhtml']['body'])) :
58 $content = $this->item['xhtml']['body'];
59 elseif (isset($this->item['xhtml']['div'])) :
60 $content = $this->item['xhtml']['div'];
61 elseif (isset($this->item['content']['encoded']) and $this->item['content']['encoded']):
62 $content = $this->item['content']['encoded'];
63 else:
64 $content = $this->item['description'];
65 endif;
66 $this->post['post_content'] = apply_filters('syndicated_item_content', $content, $this);
68 # Identify and sanitize excerpt
69 $excerpt = NULL;
70 if ( isset($this->item['description']) and $this->item['description'] ) :
71 $excerpt = $this->item['description'];
72 elseif ( isset($content) and $content ) :
73 $excerpt = strip_tags($content);
74 if (strlen($excerpt) > 255) :
75 $excerpt = substr($excerpt,0,252).'...';
76 endif;
77 endif;
78 $excerpt = apply_filters('syndicated_item_excerpt', $excerpt, $this);
80 if (!is_null($excerpt)):
81 $this->post['post_excerpt'] = $excerpt;
82 endif;
84 // This is unnecessary if we use wp_insert_post
85 if (!$this->use_api('wp_insert_post')) :
86 $this->post['post_name'] = sanitize_title($this->post['post_title']);
87 endif;
89 $this->post['epoch']['issued'] = apply_filters('syndicated_item_published', $this->published(), $this);
90 $this->post['epoch']['created'] = apply_filters('syndicated_item_created', $this->created(), $this);
91 $this->post['epoch']['modified'] = apply_filters('syndicated_item_updated', $this->updated(), $this);
93 // Dealing with timestamps in WordPress is so fucking fucked.
94 $offset = (int) get_option('gmt_offset') * 60 * 60;
95 $this->post['post_date'] = gmdate('Y-m-d H:i:s', $this->published() + $offset);
96 $this->post['post_modified'] = gmdate('Y-m-d H:i:s', $this->updated() + $offset);
97 $this->post['post_date_gmt'] = gmdate('Y-m-d H:i:s', $this->published());
98 $this->post['post_modified_gmt'] = gmdate('Y-m-d H:i:s', $this->updated());
100 // Use feed-level preferences or the global default.
101 $this->post['post_status'] = $this->link->syndicated_status('post', 'publish');
102 $this->post['comment_status'] = $this->link->syndicated_status('comment', 'closed');
103 $this->post['ping_status'] = $this->link->syndicated_status('ping', 'closed');
105 // Unique ID (hopefully a unique tag: URI); failing that, the permalink
106 $this->post['guid'] = apply_filters('syndicated_item_guid', $this->guid(), $this);
108 // User-supplied custom settings to apply to each post. Do first so that FWP-generated custom settings will overwrite if necessary; thus preventing any munging
109 $default_custom_settings = get_option('feedwordpress_custom_settings');
110 if ($default_custom_settings) :
111 $default_custom_settings = unserialize($default_custom_settings);
112 endif;
113 if (!is_array($default_custom_settings)) :
114 $default_custom_settings = array();
115 endif;
117 $custom_settings = (isset($this->link->settings['postmeta']) ? $this->link->settings['postmeta'] : null);
118 if ($custom_settings) :
119 $custom_settings = unserialize($custom_settings);
120 endif;
121 if (!is_array($custom_settings)) :
122 $custom_settings = array();
123 endif;
124 $this->post['meta'] = array_merge($default_custom_settings, $custom_settings);
126 // RSS 2.0 / Atom 1.0 enclosure support
127 if ( isset($this->item['enclosure#']) ) :
128 for ($i = 1; $i <= $this->item['enclosure#']; $i++) :
129 $eid = (($i > 1) ? "#{$id}" : "");
130 $this->post['meta']['enclosure'][] =
131 apply_filters('syndicated_item_enclosure_url', $this->item["enclosure{$eid}@url"], $this)."\n".
132 apply_filters('syndicated_item_enclosure_length', $this->item["enclosure{$eid}@length"], $this)."\n".
133 apply_filters('syndicated_item_enclosure_type', $this->item["enclosure{$eid}@type"], $this);
134 endfor;
135 endif;
137 // In case you want to point back to the blog this was syndicated from
138 if (isset($this->feed->channel['title'])) :
139 $this->post['meta']['syndication_source'] = apply_filters('syndicated_item_source_title', $this->feed->channel['title'], $this);
140 endif;
142 if (isset($this->feed->channel['link'])) :
143 $this->post['meta']['syndication_source_uri'] = apply_filters('syndicated_item_source_link', $this->feed->channel['link'], $this);
144 endif;
146 // Make use of atom:source data, if present in an aggregated feed
147 if (isset($this->item['source_title'])) :
148 $this->post['meta']['syndication_source_original'] = $this->item['source_title'];
149 endif;
151 if (isset($this->item['source_link'])) :
152 $this->post['meta']['syndication_source_uri_original'] = $this->item['source_link'];
153 endif;
155 if (isset($this->item['source_id'])) :
156 $this->post['meta']['syndication_source_id_original'] = $this->item['source_id'];
157 endif;
159 // Store information on human-readable and machine-readable comment URIs
160 if (isset($this->item['comments'])) :
161 $this->post['meta']['rss:comments'] = apply_filters('syndicated_item_comments', $this->item['comments']);
162 endif;
163 if (isset($this->item['wfw']['commentrss'])) :
164 $this->post['meta']['wfw:commentRSS'] = apply_filters('syndicated_item_commentrss', $this->item['wfw']['commentrss']);
165 endif;
167 // Store information to identify the feed that this came from
168 $this->post['meta']['syndication_feed'] = $this->feedmeta['link/uri'];
169 $this->post['meta']['syndication_feed_id'] = $this->feedmeta['link/id'];
171 if (isset($this->item['source_link_self'])) :
172 $this->post['meta']['syndication_feed_original'] = $this->item['source_link_self'];
173 endif;
175 // In case you want to know the external permalink...
176 $this->post['meta']['syndication_permalink'] = apply_filters('syndicated_item_link', $this->item['link']);
178 // Store a hash of the post content for checking whether something needs to be updated
179 $this->post['meta']['syndication_item_hash'] = $this->update_hash();
181 // Feed-by-feed options for author and category creation
182 $this->post['named']['unfamiliar']['author'] = (isset($this->feedmeta['unfamiliar author']) ? $this->feedmeta['unfamiliar author'] : null);
183 $this->post['named']['unfamiliar']['category'] = (isset($this->feedmeta['unfamiliar category']) ? $this->feedmeta['unfamiliar category'] : null);
185 // Categories: start with default categories, if any
186 $fc = get_option("feedwordpress_syndication_cats");
187 if ($fc) :
188 $this->post['named']['preset/category'] = explode("\n", $fc);
189 else :
190 $this->post['named']['preset/category'] = array();
191 endif;
193 if (isset($this->feedmeta['cats']) and is_array($this->feedmeta['cats'])) :
194 $this->post['named']['preset/category'] = array_merge($this->post['named']['preset/category'], $this->feedmeta['cats']);
195 endif;
197 // Now add categories from the post, if we have 'em
198 $this->post['named']['category'] = array();
199 if ( isset($this->item['category#']) ) :
200 for ($i = 1; $i <= $this->item['category#']; $i++) :
201 $cat_idx = (($i > 1) ? "#{$i}" : "");
202 $cat = $this->item["category{$cat_idx}"];
204 if ( isset($this->feedmeta['cat_split']) and strlen($this->feedmeta['cat_split']) > 0) :
205 $pcre = "\007".$this->feedmeta['cat_split']."\007";
206 $this->post['named']['category'] = array_merge($this->post['named']['category'], preg_split($pcre, $cat, -1 /*=no limit*/, PREG_SPLIT_NO_EMPTY));
207 else :
208 $this->post['named']['category'][] = $cat;
209 endif;
210 endfor;
211 endif;
212 $this->post['named']['category'] = apply_filters('syndicated_item_categories', $this->post['named']['category'], $this);
214 // Tags: start with default tags, if any
215 $ft = get_option("feedwordpress_syndication_tags");
216 if ($ft) :
217 $this->post['tags_input'] = explode(FEEDWORDPRESS_CAT_SEPARATOR, $ft);
218 else :
219 $this->post['tags_input'] = array();
220 endif;
222 if (isset($this->feedmeta['tags']) and is_array($this->feedmeta['tags'])) :
223 $this->post['tags_input'] = array_merge($this->post['tags_input'], $this->feedmeta['tags']);
224 endif;
226 endif;
227 } // SyndicatedPost::SyndicatedPost()
229 function filtered () {
230 return is_null($this->post);
231 }
233 function freshness () {
234 global $wpdb;
236 if ($this->filtered()) : // This should never happen.
237 FeedWordPress::critical_bug('SyndicatedPost', $this, __LINE__);
238 endif;
240 if (is_null($this->_freshness)) :
241 $guid = $wpdb->escape($this->guid());
243 $result = $wpdb->get_row("
244 SELECT id, guid, post_modified_gmt
245 FROM $wpdb->posts WHERE guid='$guid'
246 ");
248 if (!$result) :
249 $this->_freshness = 2; // New content
250 else:
251 $stored_update_hashes = get_post_custom_values('syndication_item_hash', $result->id);
252 if (count($stored_update_hashes) > 0) :
253 $stored_update_hash = $stored_update_hashes[0];
254 $update_hash_changed = ($stored_update_hash != $this->update_hash());
255 else :
256 $update_hash_changed = false;
257 endif;
259 preg_match('/([0-9]+)-([0-9]+)-([0-9]+) ([0-9]+):([0-9]+):([0-9]+)/', $result->post_modified_gmt, $backref);
261 $last_rev_ts = gmmktime($backref[4], $backref[5], $backref[6], $backref[2], $backref[3], $backref[1]);
262 $updated_ts = $this->updated(/*fallback=*/ true, /*default=*/ NULL);
263 $updated = ((
264 !is_null($updated_ts)
265 and ($updated_ts > $last_rev_ts)
266 ) or $update_hash_changed);
268 if ($updated) :
269 $this->_freshness = 1; // Updated content
270 $this->_wp_id = $result->id;
271 else :
272 $this->_freshness = 0; // Same old, same old
273 $this->_wp_id = $result->id;
274 endif;
275 endif;
276 endif;
277 return $this->_freshness;
278 }
280 function wp_id () {
281 if ($this->filtered()) : // This should never happen.
282 FeedWordPress::critical_bug('SyndicatedPost', $this, __LINE__);
283 endif;
285 if (is_null($this->_wp_id) and is_null($this->_freshness)) :
286 $fresh = $this->freshness(); // sets WP DB id in the process
287 endif;
288 return $this->_wp_id;
289 }
291 function store () {
292 global $wpdb;
294 if ($this->filtered()) : // This should never happen.
295 FeedWordPress::critical_bug('SyndicatedPost', $this, __LINE__);
296 endif;
298 $freshness = $this->freshness();
299 if ($freshness > 0) :
300 # -- Look up, or create, numeric ID for author
301 $this->post['post_author'] = $this->author_id (
302 FeedWordPress::on_unfamiliar('author', $this->post['named']['unfamiliar']['author'])
303 );
305 if (is_null($this->post['post_author'])) :
306 $this->post = NULL;
307 endif;
308 endif;
310 if (!$this->filtered() and $freshness > 0) :
311 # -- Look up, or create, numeric ID for categories
312 list($pcats, $ptags) = $this->category_ids (
313 $this->post['named']['category'],
314 FeedWordPress::on_unfamiliar('category', $this->post['named']['unfamiliar']['category']),
315 /*tags_too=*/ true
316 );
318 $this->post['post_category'] = $pcats;
319 $this->post['tags_input'] = array_merge($this->post['tags_input'], $ptags);
321 if (is_null($this->post['post_category'])) :
322 // filter mode on, no matching categories; drop the post
323 $this->post = NULL;
324 else :
325 // filter mode off or at least one match; now add on the feed and global presets
326 $this->post['post_category'] = array_merge (
327 $this->post['post_category'],
328 $this->category_ids (
329 $this->post['named']['preset/category'],
330 'default'
331 )
332 );
334 if (count($this->post['post_category']) < 1) :
335 $this->post['post_category'][] = 1; // Default to category 1 ("Uncategorized" / "General") if nothing else
336 endif;
337 endif;
338 endif;
340 if (!$this->filtered() and $freshness > 0) :
341 unset($this->post['named']);
342 $this->post = apply_filters('syndicated_post', $this->post, $this);
343 endif;
345 if (!$this->filtered() and $freshness == 2) :
346 // The item has not yet been added. So let's add it.
347 $this->insert_new();
348 $this->add_rss_meta();
349 do_action('post_syndicated_item', $this->wp_id());
351 $ret = 'new';
352 elseif (!$this->filtered() and $freshness == 1) :
353 $this->post['ID'] = $this->wp_id();
354 $this->update_existing();
355 $this->add_rss_meta();
356 do_action('update_syndicated_item', $this->wp_id());
358 $ret = 'updated';
359 else :
360 $ret = false;
361 endif;
363 return $ret;
364 } // function SyndicatedPost::store ()
366 function insert_new () {
367 global $wpdb, $wp_db_version;
369 $dbpost = $this->normalize_post(/*new=*/ true);
370 if (!is_null($dbpost)) :
371 if ($this->use_api('wp_insert_post')) :
372 $dbpost['post_pingback'] = false; // Tell WP 2.1 and 2.2 not to process for pingbacks
374 // This is a ridiculous fucking kludge necessitated by WordPress 2.6 munging authorship meta-data
375 add_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
377 // Kludge to prevent kses filters from stripping the
378 // content of posts when updating without a logged in
379 // user who has `unfiltered_html` capability.
380 add_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
382 $this->_wp_id = wp_insert_post($dbpost);
384 // Turn off ridiculous fucking kludges #1 and #2
385 remove_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
386 remove_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
388 $this->validate_post_id($dbpost, array(__CLASS__, __FUNCTION__));
390 // Unfortunately, as of WordPress 2.3, wp_insert_post()
391 // *still* offers no way to use a guid of your choice,
392 // and munges your post modified timestamp, too.
393 $result = $wpdb->query("
394 UPDATE $wpdb->posts
395 SET
396 guid='{$dbpost['guid']}',
397 post_modified='{$dbpost['post_modified']}',
398 post_modified_gmt='{$dbpost['post_modified_gmt']}'
399 WHERE ID='{$this->_wp_id}'
400 ");
401 else :
402 # The right way to do this is the above. But, alas,
403 # in earlier versions of WordPress, wp_insert_post has
404 # too much behavior (mainly related to pings) that can't
405 # be overridden. In WordPress 1.5, it's enough of a
406 # resource hog to make PHP segfault after inserting
407 # 50-100 posts. This can get pretty annoying, especially
408 # if you are trying to update your feeds for the first
409 # time.
411 $result = $wpdb->query("
412 INSERT INTO $wpdb->posts
413 SET
414 guid = '{$dbpost['guid']}',
415 post_author = '{$dbpost['post_author']}',
416 post_date = '{$dbpost['post_date']}',
417 post_date_gmt = '{$dbpost['post_date_gmt']}',
418 post_content = '{$dbpost['post_content']}',"
419 .(isset($dbpost['post_excerpt']) ? "post_excerpt = '{$dbpost['post_excerpt']}'," : "")."
420 post_title = '{$dbpost['post_title']}',
421 post_name = '{$dbpost['post_name']}',
422 post_modified = '{$dbpost['post_modified']}',
423 post_modified_gmt = '{$dbpost['post_modified_gmt']}',
424 comment_status = '{$dbpost['comment_status']}',
425 ping_status = '{$dbpost['ping_status']}',
426 post_status = '{$dbpost['post_status']}'
427 ");
428 $this->_wp_id = $wpdb->insert_id;
430 $this->validate_post_id($dbpost, array(__CLASS__, __FUNCTION__));
432 // WordPress 1.5.x - 2.0.x
433 wp_set_post_cats('1', $this->wp_id(), $this->post['post_category']);
435 // Since we are not going through official channels, we need to
436 // manually tell WordPress that we've published a new post.
437 // We need to make sure to do this in order for FeedWordPress
438 // to play well with the staticize-reloaded plugin (something
439 // that a large aggregator website is going to *want* to be
440 // able to use).
441 do_action('publish_post', $this->_wp_id);
442 endif;
443 endif;
444 } /* SyndicatedPost::insert_new() */
446 function update_existing () {
447 global $wpdb;
449 // Why the fuck doesn't wp_insert_post already do this?
450 $dbpost = $this->normalize_post(/*new=*/ false);
451 if (!is_null($dbpost)) :
452 if ($this->use_api('wp_insert_post')) :
453 $dbpost['post_pingback'] = false; // Tell WP 2.1 and 2.2 not to process for pingbacks
455 // This is a ridiculous fucking kludge necessitated by WordPress 2.6 munging authorship meta-data
456 add_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
458 // Kludge to prevent kses filters from stripping the
459 // content of posts when updating without a logged in
460 // user who has `unfiltered_html` capability.
461 add_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
463 // Don't munge status fields that the user may have reset manually
464 if (function_exists('get_post_field')) :
465 $doNotMunge = array('post_status', 'comment_status', 'ping_status');
466 foreach ($doNotMunge as $field) :
467 $dbpost[$field] = get_post_field($field, $this->wp_id());
468 endforeach;
469 endif;
471 $this->_wp_id = wp_insert_post($dbpost);
473 // Turn off ridiculous fucking kludges #1 and #2
474 remove_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
475 remove_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
477 $this->validate_post_id($dbpost, array(__CLASS__, __FUNCTION__));
479 // Unfortunately, as of WordPress 2.3, wp_insert_post()
480 // munges your post modified timestamp.
481 $result = $wpdb->query("
482 UPDATE $wpdb->posts
483 SET
484 post_modified='{$dbpost['post_modified']}',
485 post_modified_gmt='{$dbpost['post_modified_gmt']}'
486 WHERE ID='{$this->_wp_id}'
487 ");
488 else :
490 $result = $wpdb->query("
491 UPDATE $wpdb->posts
492 SET
493 post_author = '{$dbpost['post_author']}',
494 post_content = '{$dbpost['post_content']}',"
495 .(isset($dbpost['post_excerpt']) ? "post_excerpt = '{$dbpost['post_excerpt']}'," : "")."
496 post_title = '{$dbpost['post_title']}',
497 post_name = '{$dbpost['post_name']}',
498 post_modified = '{$dbpost['post_modified']}',
499 post_modified_gmt = '{$dbpost['post_modified_gmt']}'
500 WHERE guid='{$dbpost['guid']}'
501 ");
503 // WordPress 2.1.x and up
504 if (function_exists('wp_set_post_categories')) :
505 wp_set_post_categories($this->wp_id(), $this->post['post_category']);
506 // WordPress 1.5.x - 2.0.x
507 elseif (function_exists('wp_set_post_cats')) :
508 wp_set_post_cats('1', $this->wp_id(), $this->post['post_category']);
509 // This should never happen.
510 else :
511 FeedWordPress::critical_bug(__CLASS__.'::'.__FUNCTION.'(): no post categorizing function', array("dbpost" => $dbpost, "this" => $this), __LINE__);
512 endif;
514 // Since we are not going through official channels, we need to
515 // manually tell WordPress that we've published a new post.
516 // We need to make sure to do this in order for FeedWordPress
517 // to play well with the staticize-reloaded plugin (something
518 // that a large aggregator website is going to *want* to be
519 // able to use).
520 do_action('edit_post', $this->post['ID']);
521 endif;
522 endif;
523 } /* SyndicatedPost::update_existing() */
525 /**
526 * SyndicatedPost::normalize_post()
527 *
528 * @param bool $new If true, this post is to be inserted anew. If false, it is an update of an existing post.
529 * @return array A normalized representation of the post ready to be inserted into the database or sent to the WordPress API functions
530 */
531 function normalize_post ($new = true) {
532 global $wpdb;
534 $out = array();
536 // Why the fuck doesn't wp_insert_post already do this?
537 foreach ($this->post as $key => $value) :
538 if (is_string($value)) :
539 $out[$key] = $wpdb->escape($value);
540 else :
541 $out[$key] = $value;
542 endif;
543 endforeach;
545 if (strlen($out['post_title'].$out['post_content'].$out['post_excerpt']) == 0) :
546 // FIXME: Option for filtering out empty posts
547 endif;
548 if (strlen($out['post_title'])==0) :
549 $offset = (int) get_option('gmt_offset') * 60 * 60;
550 $out['post_title'] =
551 $this->post['meta']['syndication_source']
552 .' '.gmdate('Y-m-d H:i:s', $this->published() + $offset);
553 // FIXME: Option for what to fill a blank title with...
554 endif;
556 return $out;
557 }
559 /**
560 * SyndicatedPost::validate_post_id()
561 *
562 * @param array $dbpost An array representing the post we attempted to insert or update
563 * @param mixed $ns A string or array representing the namespace (class, method) whence this method was called.
564 */
565 function validate_post_id ($dbpost, $ns) {
566 if (is_array($ns)) : $ns = implode('::', $ns);
567 else : $ns = (string) $ns; endif;
569 // This should never happen.
570 if (!is_numeric($this->_wp_id) or ($this->_wp_id == 0)) :
571 FeedWordPress::critical_bug(
572 /*name=*/ $ns.'::_wp_id',
573 /*var =*/ array(
574 "\$this->_wp_id" => $this->_wp_id,
575 "\$dbpost" => $dbpost,
576 "\$this" => $this
577 ),
578 /*line # =*/ __LINE__
579 );
580 endif;
581 } /* SyndicatedPost::validate_post_id() */
583 /**
584 * SyndicatedPost::fix_revision_meta() - Fixes the way WP 2.6+ fucks up
585 * meta-data (authorship, etc.) when storing revisions of an updated
586 * syndicated post.
587 *
588 * In their infinite wisdom, the WordPress coders have made it completely
589 * impossible for a plugin that uses wp_insert_post() to set certain
590 * meta-data (such as the author) when you store an old revision of an
591 * updated post. Instead, it uses the WordPress defaults (= currently
592 * active user ID if the process is running with a user logged in, or
593 * = #0 if there is no user logged in). This results in bogus authorship
594 * data for revisions that are syndicated from off the feed, unless we
595 * use a ridiculous kludge like this to end-run the munging of meta-data
596 * by _wp_put_post_revision.
597 *
598 * @param int $revision_id The revision ID to fix up meta-data
599 */
600 function fix_revision_meta ($revision_id) {
601 global $wpdb;
603 $post_author = (int) $this->post['post_author'];
605 $revision_id = (int) $revision_id;
606 $wpdb->query("
607 UPDATE $wpdb->posts
608 SET post_author={$this->post['post_author']}
609 WHERE post_type = 'revision' AND ID='$revision_id'
610 ");
611 } /* SyndicatedPost::fix_revision_meta () */
613 /**
614 * SyndicatedPost::avoid_kses_munge() -- If FeedWordPress is processing
615 * an automatic update, that generally means that wp_insert_post() is
616 * being called under the user credentials of whoever is viewing the
617 * blog at the time -- usually meaning no user at all. But if WordPress
618 * gets a wp_insert_post() when current_user_can('unfiltered_html') is
619 * false, it will run the content of the post through a kses function
620 * that strips out lots of HTML tags -- notably <object> and some others.
621 * This causes problems for syndicating (for example) feeds that contain
622 * YouTube videos. It also produces an unexpected asymmetry between
623 * automatically-initiated updates and updates initiated manually from
624 * the WordPress Dashboard (which are usually initiated under the
625 * credentials of a logged-in admin, and so don't get run through the
626 * kses function). So, to avoid the whole mess, what we do here is
627 * just forcibly disable the kses munging for a single syndicated post,
628 * by restoring the contents of the `post_content` field.
629 *
630 * @param string $content The content of the post, after other filters have gotten to it
631 * @return string The original content of the post, before other filters had a chance to munge it.
632 */
633 function avoid_kses_munge ($content) {
634 global $wpdb;
635 return $wpdb->escape($this->post['post_content']);
636 }
638 // SyndicatedPost::add_rss_meta: adds interesting meta-data to each entry
639 // using the space for custom keys. The set of keys and values to add is
640 // specified by the keys and values of $post['meta']. This is used to
641 // store anything that the WordPress user might want to access from a
642 // template concerning the post's original source that isn't provided
643 // for by standard WP meta-data (i.e., any interesting data about the
644 // syndicated post other than author, title, timestamp, categories, and
645 // guid). It's also used to hook into WordPress's support for
646 // enclosures.
647 function add_rss_meta () {
648 global $wpdb;
649 if ( is_array($this->post) and isset($this->post['meta']) and is_array($this->post['meta']) ) :
650 $postId = $this->wp_id();
652 // Aggregated posts should NOT send out pingbacks.
653 // WordPress 2.1-2.2 claim you can tell them not to
654 // using $post_pingback, but they don't listen, so we
655 // make sure here.
656 $result = $wpdb->query("
657 DELETE FROM $wpdb->postmeta
658 WHERE post_id='$postId' AND meta_key='_pingme'
659 ");
661 foreach ( $this->post['meta'] as $key => $values ) :
663 $key = $wpdb->escape($key);
665 // If this is an update, clear out the old
666 // values to avoid duplication.
667 $result = $wpdb->query("
668 DELETE FROM $wpdb->postmeta
669 WHERE post_id='$postId' AND meta_key='$key'
670 ");
672 // Allow for either a single value or an array
673 if (!is_array($values)) $values = array($values);
674 foreach ( $values as $value ) :
675 $value = $wpdb->escape($value);
676 $result = $wpdb->query("
677 INSERT INTO $wpdb->postmeta
678 SET
679 post_id='$postId',
680 meta_key='$key',
681 meta_value='$value'
682 ");
683 endforeach;
684 endforeach;
685 endif;
686 } /* SyndicatedPost::add_rss_meta () */
688 // SyndicatedPost::author_id (): get the ID for an author name from
689 // the feed. Create the author if necessary.
690 function author_id ($unfamiliar_author = 'create') {
691 global $wpdb;
693 $a = $this->author();
694 $author = $a['name'];
695 $email = $a['email'];
696 $url = $a['uri'];
698 $match_author_by_email = !('yes' == get_option("feedwordpress_do_not_match_author_by_email"));
699 if ($match_author_by_email and !FeedWordPress::is_null_email($email)) :
700 $test_email = $email;
701 else :
702 $test_email = NULL;
703 endif;
705 // Never can be too careful...
706 $login = sanitize_user($author, /*strict=*/ true);
707 $login = apply_filters('pre_user_login', $login);
709 $nice_author = sanitize_title($author);
710 $nice_author = apply_filters('pre_user_nicename', $nice_author);
712 $reg_author = $wpdb->escape(preg_quote($author));
713 $author = $wpdb->escape($author);
714 $email = $wpdb->escape($email);
715 $test_email = $wpdb->escape($test_email);
716 $url = $wpdb->escape($url);
718 // Check for an existing author rule....
719 if (isset($this->link->settings['map authors']['name'][strtolower(trim($author))])) :
720 $author_rule = $this->link->settings['map authors']['name'][strtolower(trim($author))];
721 else :
722 $author_rule = NULL;
723 endif;
725 // User name is mapped to a particular author. If that author ID exists, use it.
726 if (is_numeric($author_rule) and get_userdata((int) $author_rule)) :
727 $id = (int) $author_rule;
729 // User name is filtered out
730 elseif ('filter' == $author_rule) :
731 $id = NULL;
733 else :
734 // Check the database for an existing author record that might fit
736 #-- WordPress 2.0+
737 if (fwp_test_wp_version(FWP_SCHEMA_HAS_USERMETA)) :
739 // First try the user core data table.
740 $id = $wpdb->get_var(
741 "SELECT ID FROM $wpdb->users
743 TRIM(LCASE(user_login)) = TRIM(LCASE('$login'))
744 OR (
745 LENGTH(TRIM(LCASE(user_email))) > 0
746 AND TRIM(LCASE(user_email)) = TRIM(LCASE('$test_email'))
747 )
748 OR TRIM(LCASE(user_nicename)) = TRIM(LCASE('$nice_author'))
749 ");
751 // If that fails, look for aliases in the user meta data table
752 if (is_null($id)) :
753 $id = $wpdb->get_var(
754 "SELECT user_id FROM $wpdb->usermeta
756 (meta_key = 'description' AND TRIM(LCASE(meta_value)) = TRIM(LCASE('$author')))
757 OR (
758 meta_key = 'description'
759 AND TRIM(LCASE(meta_value))
761 '(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*',
762 TRIM(LCASE('$reg_author')),
763 '( |\\t|\\r)*(\\n|\$)'
764 )
765 )
766 ");
767 endif;
769 #-- WordPress 1.5.x
770 else :
771 $id = $wpdb->get_var(
772 "SELECT ID from $wpdb->users
774 TRIM(LCASE(user_login)) = TRIM(LCASE('$login')) OR
775 (
776 LENGTH(TRIM(LCASE(user_email))) > 0
777 AND TRIM(LCASE(user_email)) = TRIM(LCASE('$test_email'))
778 ) OR
779 TRIM(LCASE(user_firstname)) = TRIM(LCASE('$author')) OR
780 TRIM(LCASE(user_nickname)) = TRIM(LCASE('$author')) OR
781 TRIM(LCASE(user_nicename)) = TRIM(LCASE('$nice_author')) OR
782 TRIM(LCASE(user_description)) = TRIM(LCASE('$author')) OR
783 (
784 LOWER(user_description)
786 '(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*',
787 LCASE('$reg_author'),
788 '( |\\t|\\r)*(\\n|\$)'
789 )
790 )
791 ");
793 endif;
795 // ... if you don't find one, then do what you need to do
796 if (is_null($id)) :
797 if ($unfamiliar_author === 'create') :
798 $userdata = array();
800 #-- user table data
801 $userdata['ID'] = NULL; // new user
802 $userdata['user_login'] = $login;
803 $userdata['user_nicename'] = $nice_author;
804 $userdata['user_pass'] = substr(md5(uniqid(microtime())), 0, 6); // just something random to lock it up
805 $userdata['user_email'] = $email;
806 $userdata['user_url'] = $url;
807 $userdata['display_name'] = $author;
809 $id = wp_insert_user($userdata);
810 elseif (is_numeric($unfamiliar_author) and get_userdata((int) $unfamiliar_author)) :
811 $id = (int) $unfamiliar_author;
812 elseif ($unfamiliar_author === 'default') :
813 $id = 1;
814 endif;
815 endif;
816 endif;
818 if ($id) :
819 $this->link->settings['map authors']['name'][strtolower(trim($author))] = $id;
820 endif;
821 return $id;
822 } // function SyndicatedPost::author_id ()
824 // look up (and create) category ids from a list of categories
825 function category_ids ($cats, $unfamiliar_category = 'create', $tags_too = false) {
826 global $wpdb;
828 // We need to normalize whitespace because (1) trailing
829 // whitespace can cause PHP and MySQL not to see eye to eye on
830 // VARCHAR comparisons for some versions of MySQL (cf.
831 // <>), and (2)
832 // because I doubt most people want to make a semantic
833 // distinction between 'Computers' and 'Computers '
834 $cats = array_map('trim', $cats);
836 $tags = array();
838 $cat_ids = array ();
839 foreach ($cats as $cat_name) :
840 if (preg_match('/^{#([0-9]+)}$/', $cat_name, $backref)) :
841 $cat_id = (int) $backref[1];
842 if (function_exists('is_term') and is_term($cat_id, 'category')) :
843 $cat_ids[] = $cat_id;
844 elseif (get_category($cat_id)) :
845 $cat_ids[] = $cat_id;
846 endif;
847 elseif (strlen($cat_name) > 0) :
848 $esc = $wpdb->escape($cat_name);
849 $resc = $wpdb->escape(preg_quote($cat_name));
851 // WordPress 2.3+
852 if (function_exists('is_term')) :
853 $cat_id = is_term($cat_name, 'category');
854 if ($cat_id) :
855 $cat_ids[] = $cat_id['term_id'];
856 // There must be a better way to do this...
857 elseif ($results = $wpdb->get_results(
858 "SELECT term_id
859 FROM $wpdb->term_taxonomy
861 LOWER(description) RLIKE
862 CONCAT('(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*', LOWER('{$resc}'), '( |\\t|\\r)*(\\n|\$)')"
863 )) :
864 foreach ($results AS $term) :
865 $cat_ids[] = (int) $term->term_id;
866 endforeach;
867 elseif ('tag'==$unfamiliar_category) :
868 $tags[] = $cat_name;
869 elseif ('create'===$unfamiliar_category) :
870 $term = wp_insert_term($cat_name, 'category');
871 if (is_wp_error($term)) :
872 FeedWordPress::noncritical_bug('term insertion problem', array('cat_name' => $cat_name, 'term' => $term, 'this' => $this), __LINE__);
873 else :
874 $cat_ids[] = $term['term_id'];
875 endif;
876 endif;
878 // WordPress 1.5.x - 2.2.x
879 else :
880 $results = $wpdb->get_results(
881 "SELECT cat_ID
882 FROM $wpdb->categories
884 (LOWER(cat_name) = LOWER('$esc'))
885 OR (LOWER(category_description)
886 RLIKE CONCAT('(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*', LOWER('{$resc}'), '( |\\t|\\r)*(\\n|\$)'))
887 ");
888 if ($results) :
889 foreach ($results as $term) :
890 $cat_ids[] = (int) $term->cat_ID;
891 endforeach;
892 elseif ('create'===$unfamiliar_category) :
893 if (function_exists('wp_insert_category')) :
894 $cat_id = wp_insert_category(array('cat_name' => $cat_name));
895 // And into the database we go.
896 else :
897 $nice_kitty = sanitize_title($cat_name);
898 $wpdb->query(sprintf("
899 INSERT INTO $wpdb->categories
900 SET
901 cat_name='%s',
902 category_nicename='%s'
903 ", $wpdb->escape($cat_name), $nice_kitty
904 ));
905 $cat_id = $wpdb->insert_id;
906 endif;
907 $cat_ids[] = $cat_id;
908 endif;
909 endif;
910 endif;
911 endforeach;
913 if ((count($cat_ids) == 0) and ($unfamiliar_category === 'filter')) :
914 $cat_ids = NULL; // Drop the post
915 else :
916 $cat_ids = array_unique($cat_ids);
917 endif;
919 if ($tags_too) : $ret = array($cat_ids, $tags);
920 else : $ret = $cat_ids;
921 endif;
923 return $ret;
924 } // function SyndicatedPost::category_ids ()
926 function use_api ($tag) {
927 global $wp_db_version;
928 switch ($tag) :
929 case 'wp_insert_post':
930 // Before 2.2, wp_insert_post does too much of the wrong stuff to use it
931 // In 1.5 it was such a resource hog it would make PHP segfault on big updates
932 $ret = (isset($wp_db_version) and $wp_db_version > FWP_SCHEMA_21);
933 break;
934 case 'post_status_pending':
935 $ret = (isset($wp_db_version) and $wp_db_version > FWP_SCHEMA_23);
936 break;
937 endswitch;
938 return $ret;
939 } // function SyndicatedPost::use_api ()
943 function created () {
944 $epoch = null;
945 if (isset($this->item['dc']['created'])) :
946 $epoch = @parse_w3cdtf($this->item['dc']['created']);
947 elseif (isset($this->item['dcterms']['created'])) :
948 $epoch = @parse_w3cdtf($this->item['dcterms']['created']);
949 elseif (isset($this->item['created'])): // Atom 0.3
950 $epoch = @parse_w3cdtf($this->item['created']);
951 endif;
952 return $epoch;
953 }
954 function published ($fallback = true) {
955 $epoch = null;
957 # RSS is a fucking mess. Figure out whether we have a date in
958 # <dc:date>, <issued>, <pubDate>, etc., and get it into Unix
959 # epoch format for reformatting. If we can't find anything,
960 # we'll use the last-updated time.
961 if (isset($this->item['dc']['date'])): // Dublin Core
962 $epoch = @parse_w3cdtf($this->item['dc']['date']);
963 elseif (isset($this->item['dcterms']['issued'])) : // Dublin Core extensions
964 $epoch = @parse_w3cdtf($this->item['dcterms']['issued']);
965 elseif (isset($this->item['published'])) : // Atom 1.0
966 $epoch = @parse_w3cdtf($this->item['published']);
967 elseif (isset($this->item['issued'])): // Atom 0.3
968 $epoch = @parse_w3cdtf($this->item['issued']);
969 elseif (isset($this->item['pubdate'])): // RSS 2.0
970 $epoch = strtotime($this->item['pubdate']);
971 elseif ($fallback) : // Fall back to <updated> / <modified> if present
972 $epoch = $this->updated(/*fallback=*/ false);
973 endif;
975 # If everything failed, then default to the current time.
976 if (is_null($epoch)) :
977 if (-1 == $default) :
978 $epoch = time();
979 else :
980 $epoch = $default;
981 endif;
982 endif;
984 return $epoch;
985 }
986 function updated ($fallback = true, $default = -1) {
987 $epoch = null;
989 # As far as I know, only dcterms and Atom have reliable ways to
990 # specify when something was *modified* last. If neither is
991 # available, then we'll try to get the time of publication.
992 if (isset($this->item['dc']['modified'])) : // Not really correct
993 $epoch = @parse_w3cdtf($this->item['dc']['modified']);
994 elseif (isset($this->item['dcterms']['modified'])) : // Dublin Core extensions
995 $epoch = @parse_w3cdtf($this->item['dcterms']['modified']);
996 elseif (isset($this->item['modified'])): // Atom 0.3
997 $epoch = @parse_w3cdtf($this->item['modified']);
998 elseif (isset($this->item['updated'])): // Atom 1.0
999 $epoch = @parse_w3cdtf($this->item['updated']);
1000 elseif ($fallback) : // Fall back to issued / dc:date
1001 $epoch = $this->published(/*fallback=*/ false, /*default=*/ $default);
1002 endif;
1004 # If everything failed, then default to the current time.
1005 if (is_null($epoch)) :
1006 if (-1 == $default) :
1007 $epoch = time();
1008 else :
1009 $epoch = $default;
1010 endif;
1011 endif;
1013 return $epoch;
1014 }
1016 function update_hash () {
1017 return md5(serialize($this->item));
1018 }
1020 function guid () {
1021 $guid = null;
1022 if (isset($this->item['id'])): // Atom 0.3 / 1.0
1023 $guid = $this->item['id'];
1024 elseif (isset($this->item['atom']['id'])) : // Namespaced Atom
1025 $guid = $this->item['atom']['id'];
1026 elseif (isset($this->item['guid'])) : // RSS 2.0
1027 $guid = $this->item['guid'];
1028 elseif (isset($this->item['dc']['identifier'])) :// yeah, right
1029 $guid = $this->item['dc']['identifier'];
1030 else :
1031 // The feed does not seem to have provided us with a
1032 // unique identifier, so we'll have to cobble together
1033 // a tag: URI that might work for us. The base of the
1034 // URI will be the host name of the feed source ...
1035 $bits = parse_url($this->feedmeta['link/uri']);
1036 $guid = 'tag:'.$bits['host'];
1038 // If we have a date of creation, then we can use that
1039 // to uniquely identify the item. (On the other hand, if
1040 // the feed producer was consicentious enough to
1041 // generate dates of creation, she probably also was
1042 // conscientious enough to generate unique identifiers.)
1043 if (!is_null($this->created())) :
1044 $guid .= '://post.'.date('YmdHis', $this->created());
1046 // Otherwise, use both the URI of the item, *and* the
1047 // item's title. We have to use both because titles are
1048 // often not unique, and sometimes links aren't unique
1049 // either (e.g. Bitch (S)HITLIST, Mozilla Dot Org news,
1050 // some podcasts). But it's rare to have *both* the same
1051 // title *and* the same link for two different items. So
1052 // this is about the best we can do.
1053 else :
1054 $guid .= '://'.md5($this->item['link'].'/'.$this->item['title']);
1055 endif;
1056 endif;
1057 return $guid;
1058 }
1060 function author () {
1061 $author = array ();
1063 if (isset($this->item['author_name'])):
1064 $author['name'] = $this->item['author_name'];
1065 elseif (isset($this->item['dc']['creator'])):
1066 $author['name'] = $this->item['dc']['creator'];
1067 elseif (isset($this->item['dc']['contributor'])):
1068 $author['name'] = $this->item['dc']['contributor'];
1069 elseif (isset($this->feed->channel['dc']['creator'])) :
1070 $author['name'] = $this->feed->channel['dc']['creator'];
1071 elseif (isset($this->feed->channel['dc']['contributor'])) :
1072 $author['name'] = $this->feed->channel['dc']['contributor'];
1073 elseif (isset($this->feed->channel['author_name'])) :
1074 $author['name'] = $this->feed->channel['author_name'];
1075 elseif ($this->feed->is_rss() and isset($this->item['author'])) :
1076 // The author element in RSS is allegedly an
1077 // e-mail address, but lots of people don't use
1078 // it that way. So let's make of it what we can.
1079 $author = parse_email_with_realname($this->item['author']);
1081 if (!isset($author['name'])) :
1082 if (isset($author['email'])) :
1083 $author['name'] = $author['email'];
1084 else :
1085 $author['name'] = $this->feed->channel['title'];
1086 endif;
1087 endif;
1088 else :
1089 $author['name'] = $this->feed->channel['title'];
1090 endif;
1092 if (isset($this->item['author_email'])):
1093 $author['email'] = $this->item['author_email'];
1094 elseif (isset($this->feed->channel['author_email'])) :
1095 $author['email'] = $this->feed->channel['author_email'];
1096 endif;
1098 if (isset($this->item['author_url'])):
1099 $author['uri'] = $this->item['author_url'];
1100 elseif (isset($this->feed->channel['author_url'])) :
1101 $author['uri'] = $this->item['author_url'];
1102 else:
1103 $author['uri'] = $this->feed->channel['link'];
1104 endif;
1106 return $author;
1107 } // SyndicatedPost::author()
1109 var $uri_attrs = array (
1110 array('a', 'href'),
1111 array('applet', 'codebase'),
1112 array('area', 'href'),
1113 array('blockquote', 'cite'),
1114 array('body', 'background'),
1115 array('del', 'cite'),
1116 array('form', 'action'),
1117 array('frame', 'longdesc'),
1118 array('frame', 'src'),
1119 array('iframe', 'longdesc'),
1120 array('iframe', 'src'),
1121 array('head', 'profile'),
1122 array('img', 'longdesc'),
1123 array('img', 'src'),
1124 array('img', 'usemap'),
1125 array('input', 'src'),
1126 array('input', 'usemap'),
1127 array('ins', 'cite'),
1128 array('link', 'href'),
1129 array('object', 'classid'),
1130 array('object', 'codebase'),
1131 array('object', 'data'),
1132 array('object', 'usemap'),
1133 array('q', 'cite'),
1134 array('script', 'src')
1135 ); /* var SyndicatedPost::$uri_attrs */
1137 var $_base = null;
1139 function resolve_single_relative_uri ($refs) {
1140 $tag = FeedWordPressHTML::attributeMatch($refs);
1141 $url = Relative_URI::resolve($tag['value'], $this->_base);
1142 return $tag['prefix'] . $url . $tag['suffix'];
1143 } /* function SyndicatedPost::resolve_single_relative_uri() */
1145 function resolve_relative_uris ($content, $obj) {
1146 # The MagpieRSS upgrade has some `xml:base` support baked in.
1147 # However, sometimes people do silly things, like putting
1148 # relative URIs out on a production RSS 2.0 feed or other feeds
1149 # with no good support for `xml:base`. So we'll do our best to
1150 # try to catch any remaining relative URIs and resolve them as
1151 # best we can.
1152 $obj->_base = $obj->item['link']; // Reset the base for resolving relative URIs
1154 foreach ($obj->uri_attrs as $pair) :
1155 list($tag, $attr) = $pair;
1156 $pattern = FeedWordPressHTML::attributeRegex($tag, $attr);
1157 $content = preg_replace_callback (
1158 $pattern,
1159 array(&$obj, 'resolve_single_relative_uri'),
1160 $content
1161 );
1162 endforeach;
1164 return $content;
1165 } /* function SyndicatedPost::resolve_relative_uris () */
1167 var $strip_attrs = array (
1168 array('[a-z]+', 'target'),
1169// array('[a-z]+', 'style'),
1170// array('[a-z]+', 'on[a-z]+'),
1171 );
1173 function strip_attribute_from_tag ($refs) {
1174 $tag = FeedWordPressHTML::attributeMatch($refs);
1175 return $tag['before_attribute'].$tag['after_attribute'];
1176 }
1178 function sanitize_content ($content, $obj) {
1179 # This kind of sucks. I intend to replace it with
1180 # lib_filter sometime soon.
1181 foreach ($obj->strip_attrs as $pair):
1182 list($tag,$attr) = $pair;
1183 $pattern = FeedWordPressHTML::attributeRegex($tag, $attr);
1185 $content = preg_replace_callback (
1186 $pattern,
1187 array(&$obj, 'strip_attribute_from_tag'),
1188 $content
1189 );
1190 endforeach;
1191 return $content;
1192 }
1193} // class SyndicatedPost
Note: See TracBrowser for help on using the repository browser.