source: trunk/www.guidonia.net/wp/wp-content/plugins/feedwordpress/syndicatedpost.class.php@ 44

Last change on this file since 44 was 44, checked in by luciano, 14 years ago
File size: 42.4 KB
Line 
1<?php
2class SyndicatedPost {
3 var $item = null;
4
5 var $link = null;
6 var $feed = null;
7 var $feedmeta = null;
8
9 var $post = array ();
10
11 var $_freshness = null;
12 var $_wp_id = null;
13
14 function SyndicatedPost ($item, $link) {
15 global $wpdb;
16
17 $this->link = $link;
18 $feedmeta = $link->settings;
19 $feed = $link->magpie;
20
21 # This is ugly as all hell. I'd like to use apply_filters()'s
22 # alleged support for a variable argument count, but this seems
23 # to have been broken in WordPress 1.5. It'll be fixed somehow
24 # in WP 1.5.1, but I'm aiming at WP 1.5 compatibility across
25 # the board here.
26 #
27 # Cf.: <http://mosquito.wordpress.org/view.php?id=901>
28 global $fwp_channel, $fwp_feedmeta;
29 $fwp_channel = $feed; $fwp_feedmeta = $feedmeta;
30
31 $this->feed = $feed;
32 $this->feedmeta = $feedmeta;
33
34 $this->item = $item;
35 $this->item = apply_filters('syndicated_item', $this->item, $this);
36
37 # Filters can halt further processing by returning NULL
38 if (is_null($this->item)) :
39 $this->post = NULL;
40 else :
41 # Note that nothing is run through $wpdb->escape() here.
42 # That's deliberate. The escaping is done at the point
43 # of insertion, not here, to avoid double-escaping and
44 # to avoid screwing with syndicated_post filters
45
46 $this->post['post_title'] = apply_filters('syndicated_item_title', $this->item['title'], $this);
47
48 // This just gives us an alphanumeric representation of
49 // the author. We will look up (or create) the numeric
50 // ID for the author in SyndicatedPost::add()
51 $this->post['named']['author'] = apply_filters('syndicated_item_author', $this->author(), $this);
52
53 # Identify content and sanitize it.
54 # ---------------------------------
55 if (isset($this->item['atom_content'])) :
56 $content = $this->item['atom_content'];
57 elseif (isset($this->item['xhtml']['body'])) :
58 $content = $this->item['xhtml']['body'];
59 elseif (isset($this->item['xhtml']['div'])) :
60 $content = $this->item['xhtml']['div'];
61 elseif (isset($this->item['content']['encoded']) and $this->item['content']['encoded']):
62 $content = $this->item['content']['encoded'];
63 else:
64 $content = $this->item['description'];
65 endif;
66 $this->post['post_content'] = apply_filters('syndicated_item_content', $content, $this);
67
68 # Identify and sanitize excerpt
69 $excerpt = NULL;
70 if ( isset($this->item['description']) and $this->item['description'] ) :
71 $excerpt = $this->item['description'];
72 elseif ( isset($content) and $content ) :
73 $excerpt = strip_tags($content);
74 if (strlen($excerpt) > 255) :
75 $excerpt = substr($excerpt,0,252).'...';
76 endif;
77 endif;
78 $excerpt = apply_filters('syndicated_item_excerpt', $excerpt, $this);
79
80 if (!is_null($excerpt)):
81 $this->post['post_excerpt'] = $excerpt;
82 endif;
83
84 // This is unnecessary if we use wp_insert_post
85 if (!$this->use_api('wp_insert_post')) :
86 $this->post['post_name'] = sanitize_title($this->post['post_title']);
87 endif;
88
89 $this->post['epoch']['issued'] = apply_filters('syndicated_item_published', $this->published(), $this);
90 $this->post['epoch']['created'] = apply_filters('syndicated_item_created', $this->created(), $this);
91 $this->post['epoch']['modified'] = apply_filters('syndicated_item_updated', $this->updated(), $this);
92
93 // Dealing with timestamps in WordPress is so fucking fucked.
94 $offset = (int) get_option('gmt_offset') * 60 * 60;
95 $this->post['post_date'] = gmdate('Y-m-d H:i:s', $this->published() + $offset);
96 $this->post['post_modified'] = gmdate('Y-m-d H:i:s', $this->updated() + $offset);
97 $this->post['post_date_gmt'] = gmdate('Y-m-d H:i:s', $this->published());
98 $this->post['post_modified_gmt'] = gmdate('Y-m-d H:i:s', $this->updated());
99
100 // Use feed-level preferences or the global default.
101 $this->post['post_status'] = $this->link->syndicated_status('post', 'publish');
102 $this->post['comment_status'] = $this->link->syndicated_status('comment', 'closed');
103 $this->post['ping_status'] = $this->link->syndicated_status('ping', 'closed');
104
105 // Unique ID (hopefully a unique tag: URI); failing that, the permalink
106 $this->post['guid'] = apply_filters('syndicated_item_guid', $this->guid(), $this);
107
108 // User-supplied custom settings to apply to each post. Do first so that FWP-generated custom settings will overwrite if necessary; thus preventing any munging
109 $default_custom_settings = get_option('feedwordpress_custom_settings');
110 if ($default_custom_settings) :
111 $default_custom_settings = unserialize($default_custom_settings);
112 endif;
113 if (!is_array($default_custom_settings)) :
114 $default_custom_settings = array();
115 endif;
116
117 $custom_settings = (isset($this->link->settings['postmeta']) ? $this->link->settings['postmeta'] : null);
118 if ($custom_settings) :
119 $custom_settings = unserialize($custom_settings);
120 endif;
121 if (!is_array($custom_settings)) :
122 $custom_settings = array();
123 endif;
124 $this->post['meta'] = array_merge($default_custom_settings, $custom_settings);
125
126 // RSS 2.0 / Atom 1.0 enclosure support
127 if ( isset($this->item['enclosure#']) ) :
128 for ($i = 1; $i <= $this->item['enclosure#']; $i++) :
129 $eid = (($i > 1) ? "#{$id}" : "");
130 $this->post['meta']['enclosure'][] =
131 apply_filters('syndicated_item_enclosure_url', $this->item["enclosure{$eid}@url"], $this)."\n".
132 apply_filters('syndicated_item_enclosure_length', $this->item["enclosure{$eid}@length"], $this)."\n".
133 apply_filters('syndicated_item_enclosure_type', $this->item["enclosure{$eid}@type"], $this);
134 endfor;
135 endif;
136
137 // In case you want to point back to the blog this was syndicated from
138 if (isset($this->feed->channel['title'])) :
139 $this->post['meta']['syndication_source'] = apply_filters('syndicated_item_source_title', $this->feed->channel['title'], $this);
140 endif;
141
142 if (isset($this->feed->channel['link'])) :
143 $this->post['meta']['syndication_source_uri'] = apply_filters('syndicated_item_source_link', $this->feed->channel['link'], $this);
144 endif;
145
146 // Make use of atom:source data, if present in an aggregated feed
147 if (isset($this->item['source_title'])) :
148 $this->post['meta']['syndication_source_original'] = $this->item['source_title'];
149 endif;
150
151 if (isset($this->item['source_link'])) :
152 $this->post['meta']['syndication_source_uri_original'] = $this->item['source_link'];
153 endif;
154
155 if (isset($this->item['source_id'])) :
156 $this->post['meta']['syndication_source_id_original'] = $this->item['source_id'];
157 endif;
158
159 // Store information on human-readable and machine-readable comment URIs
160 if (isset($this->item['comments'])) :
161 $this->post['meta']['rss:comments'] = apply_filters('syndicated_item_comments', $this->item['comments']);
162 endif;
163 if (isset($this->item['wfw']['commentrss'])) :
164 $this->post['meta']['wfw:commentRSS'] = apply_filters('syndicated_item_commentrss', $this->item['wfw']['commentrss']);
165 endif;
166
167 // Store information to identify the feed that this came from
168 $this->post['meta']['syndication_feed'] = $this->feedmeta['link/uri'];
169 $this->post['meta']['syndication_feed_id'] = $this->feedmeta['link/id'];
170
171 if (isset($this->item['source_link_self'])) :
172 $this->post['meta']['syndication_feed_original'] = $this->item['source_link_self'];
173 endif;
174
175 // In case you want to know the external permalink...
176 $this->post['meta']['syndication_permalink'] = apply_filters('syndicated_item_link', $this->item['link']);
177
178 // Store a hash of the post content for checking whether something needs to be updated
179 $this->post['meta']['syndication_item_hash'] = $this->update_hash();
180
181 // Feed-by-feed options for author and category creation
182 $this->post['named']['unfamiliar']['author'] = (isset($this->feedmeta['unfamiliar author']) ? $this->feedmeta['unfamiliar author'] : null);
183 $this->post['named']['unfamiliar']['category'] = (isset($this->feedmeta['unfamiliar category']) ? $this->feedmeta['unfamiliar category'] : null);
184
185 // Categories: start with default categories, if any
186 $fc = get_option("feedwordpress_syndication_cats");
187 if ($fc) :
188 $this->post['named']['preset/category'] = explode("\n", $fc);
189 else :
190 $this->post['named']['preset/category'] = array();
191 endif;
192
193 if (isset($this->feedmeta['cats']) and is_array($this->feedmeta['cats'])) :
194 $this->post['named']['preset/category'] = array_merge($this->post['named']['preset/category'], $this->feedmeta['cats']);
195 endif;
196
197 // Now add categories from the post, if we have 'em
198 $this->post['named']['category'] = array();
199 if ( isset($this->item['category#']) ) :
200 for ($i = 1; $i <= $this->item['category#']; $i++) :
201 $cat_idx = (($i > 1) ? "#{$i}" : "");
202 $cat = $this->item["category{$cat_idx}"];
203
204 if ( isset($this->feedmeta['cat_split']) and strlen($this->feedmeta['cat_split']) > 0) :
205 $pcre = "\007".$this->feedmeta['cat_split']."\007";
206 $this->post['named']['category'] = array_merge($this->post['named']['category'], preg_split($pcre, $cat, -1 /*=no limit*/, PREG_SPLIT_NO_EMPTY));
207 else :
208 $this->post['named']['category'][] = $cat;
209 endif;
210 endfor;
211 endif;
212 $this->post['named']['category'] = apply_filters('syndicated_item_categories', $this->post['named']['category'], $this);
213
214 // Tags: start with default tags, if any
215 $ft = get_option("feedwordpress_syndication_tags");
216 if ($ft) :
217 $this->post['tags_input'] = explode(FEEDWORDPRESS_CAT_SEPARATOR, $ft);
218 else :
219 $this->post['tags_input'] = array();
220 endif;
221
222 if (isset($this->feedmeta['tags']) and is_array($this->feedmeta['tags'])) :
223 $this->post['tags_input'] = array_merge($this->post['tags_input'], $this->feedmeta['tags']);
224 endif;
225
226 endif;
227 } // SyndicatedPost::SyndicatedPost()
228
229 function filtered () {
230 return is_null($this->post);
231 }
232
233 function freshness () {
234 global $wpdb;
235
236 if ($this->filtered()) : // This should never happen.
237 FeedWordPress::critical_bug('SyndicatedPost', $this, __LINE__);
238 endif;
239
240 if (is_null($this->_freshness)) :
241 $guid = $wpdb->escape($this->guid());
242
243 $result = $wpdb->get_row("
244 SELECT id, guid, post_modified_gmt
245 FROM $wpdb->posts WHERE guid='$guid'
246 ");
247
248 if (!$result) :
249 $this->_freshness = 2; // New content
250 else:
251 $stored_update_hashes = get_post_custom_values('syndication_item_hash', $result->id);
252 if (count($stored_update_hashes) > 0) :
253 $stored_update_hash = $stored_update_hashes[0];
254 $update_hash_changed = ($stored_update_hash != $this->update_hash());
255 else :
256 $update_hash_changed = false;
257 endif;
258
259 preg_match('/([0-9]+)-([0-9]+)-([0-9]+) ([0-9]+):([0-9]+):([0-9]+)/', $result->post_modified_gmt, $backref);
260
261 $last_rev_ts = gmmktime($backref[4], $backref[5], $backref[6], $backref[2], $backref[3], $backref[1]);
262 $updated_ts = $this->updated(/*fallback=*/ true, /*default=*/ NULL);
263 $updated = ((
264 !is_null($updated_ts)
265 and ($updated_ts > $last_rev_ts)
266 ) or $update_hash_changed);
267
268 if ($updated) :
269 $this->_freshness = 1; // Updated content
270 $this->_wp_id = $result->id;
271 else :
272 $this->_freshness = 0; // Same old, same old
273 $this->_wp_id = $result->id;
274 endif;
275 endif;
276 endif;
277 return $this->_freshness;
278 }
279
280 function wp_id () {
281 if ($this->filtered()) : // This should never happen.
282 FeedWordPress::critical_bug('SyndicatedPost', $this, __LINE__);
283 endif;
284
285 if (is_null($this->_wp_id) and is_null($this->_freshness)) :
286 $fresh = $this->freshness(); // sets WP DB id in the process
287 endif;
288 return $this->_wp_id;
289 }
290
291 function store () {
292 global $wpdb;
293
294 if ($this->filtered()) : // This should never happen.
295 FeedWordPress::critical_bug('SyndicatedPost', $this, __LINE__);
296 endif;
297
298 $freshness = $this->freshness();
299 if ($freshness > 0) :
300 # -- Look up, or create, numeric ID for author
301 $this->post['post_author'] = $this->author_id (
302 FeedWordPress::on_unfamiliar('author', $this->post['named']['unfamiliar']['author'])
303 );
304
305 if (is_null($this->post['post_author'])) :
306 $this->post = NULL;
307 endif;
308 endif;
309
310 if (!$this->filtered() and $freshness > 0) :
311 # -- Look up, or create, numeric ID for categories
312 list($pcats, $ptags) = $this->category_ids (
313 $this->post['named']['category'],
314 FeedWordPress::on_unfamiliar('category', $this->post['named']['unfamiliar']['category']),
315 /*tags_too=*/ true
316 );
317
318 $this->post['post_category'] = $pcats;
319 $this->post['tags_input'] = array_merge($this->post['tags_input'], $ptags);
320
321 if (is_null($this->post['post_category'])) :
322 // filter mode on, no matching categories; drop the post
323 $this->post = NULL;
324 else :
325 // filter mode off or at least one match; now add on the feed and global presets
326 $this->post['post_category'] = array_merge (
327 $this->post['post_category'],
328 $this->category_ids (
329 $this->post['named']['preset/category'],
330 'default'
331 )
332 );
333
334 if (count($this->post['post_category']) < 1) :
335 $this->post['post_category'][] = 1; // Default to category 1 ("Uncategorized" / "General") if nothing else
336 endif;
337 endif;
338 endif;
339
340 if (!$this->filtered() and $freshness > 0) :
341 unset($this->post['named']);
342 $this->post = apply_filters('syndicated_post', $this->post, $this);
343 endif;
344
345 if (!$this->filtered() and $freshness == 2) :
346 // The item has not yet been added. So let's add it.
347 $this->insert_new();
348 $this->add_rss_meta();
349 do_action('post_syndicated_item', $this->wp_id());
350
351 $ret = 'new';
352 elseif (!$this->filtered() and $freshness == 1) :
353 $this->post['ID'] = $this->wp_id();
354 $this->update_existing();
355 $this->add_rss_meta();
356 do_action('update_syndicated_item', $this->wp_id());
357
358 $ret = 'updated';
359 else :
360 $ret = false;
361 endif;
362
363 return $ret;
364 } // function SyndicatedPost::store ()
365
366 function insert_new () {
367 global $wpdb, $wp_db_version;
368
369 $dbpost = $this->normalize_post(/*new=*/ true);
370 if (!is_null($dbpost)) :
371 if ($this->use_api('wp_insert_post')) :
372 $dbpost['post_pingback'] = false; // Tell WP 2.1 and 2.2 not to process for pingbacks
373
374 // This is a ridiculous fucking kludge necessitated by WordPress 2.6 munging authorship meta-data
375 add_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
376
377 // Kludge to prevent kses filters from stripping the
378 // content of posts when updating without a logged in
379 // user who has `unfiltered_html` capability.
380 add_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
381
382 $this->_wp_id = wp_insert_post($dbpost);
383
384 // Turn off ridiculous fucking kludges #1 and #2
385 remove_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
386 remove_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
387
388 $this->validate_post_id($dbpost, array(__CLASS__, __FUNCTION__));
389
390 // Unfortunately, as of WordPress 2.3, wp_insert_post()
391 // *still* offers no way to use a guid of your choice,
392 // and munges your post modified timestamp, too.
393 $result = $wpdb->query("
394 UPDATE $wpdb->posts
395 SET
396 guid='{$dbpost['guid']}',
397 post_modified='{$dbpost['post_modified']}',
398 post_modified_gmt='{$dbpost['post_modified_gmt']}'
399 WHERE ID='{$this->_wp_id}'
400 ");
401 else :
402 # The right way to do this is the above. But, alas,
403 # in earlier versions of WordPress, wp_insert_post has
404 # too much behavior (mainly related to pings) that can't
405 # be overridden. In WordPress 1.5, it's enough of a
406 # resource hog to make PHP segfault after inserting
407 # 50-100 posts. This can get pretty annoying, especially
408 # if you are trying to update your feeds for the first
409 # time.
410
411 $result = $wpdb->query("
412 INSERT INTO $wpdb->posts
413 SET
414 guid = '{$dbpost['guid']}',
415 post_author = '{$dbpost['post_author']}',
416 post_date = '{$dbpost['post_date']}',
417 post_date_gmt = '{$dbpost['post_date_gmt']}',
418 post_content = '{$dbpost['post_content']}',"
419 .(isset($dbpost['post_excerpt']) ? "post_excerpt = '{$dbpost['post_excerpt']}'," : "")."
420 post_title = '{$dbpost['post_title']}',
421 post_name = '{$dbpost['post_name']}',
422 post_modified = '{$dbpost['post_modified']}',
423 post_modified_gmt = '{$dbpost['post_modified_gmt']}',
424 comment_status = '{$dbpost['comment_status']}',
425 ping_status = '{$dbpost['ping_status']}',
426 post_status = '{$dbpost['post_status']}'
427 ");
428 $this->_wp_id = $wpdb->insert_id;
429
430 $this->validate_post_id($dbpost, array(__CLASS__, __FUNCTION__));
431
432 // WordPress 1.5.x - 2.0.x
433 wp_set_post_cats('1', $this->wp_id(), $this->post['post_category']);
434
435 // Since we are not going through official channels, we need to
436 // manually tell WordPress that we've published a new post.
437 // We need to make sure to do this in order for FeedWordPress
438 // to play well with the staticize-reloaded plugin (something
439 // that a large aggregator website is going to *want* to be
440 // able to use).
441 do_action('publish_post', $this->_wp_id);
442 endif;
443 endif;
444 } /* SyndicatedPost::insert_new() */
445
446 function update_existing () {
447 global $wpdb;
448
449 // Why the fuck doesn't wp_insert_post already do this?
450 $dbpost = $this->normalize_post(/*new=*/ false);
451 if (!is_null($dbpost)) :
452 if ($this->use_api('wp_insert_post')) :
453 $dbpost['post_pingback'] = false; // Tell WP 2.1 and 2.2 not to process for pingbacks
454
455 // This is a ridiculous fucking kludge necessitated by WordPress 2.6 munging authorship meta-data
456 add_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
457
458 // Kludge to prevent kses filters from stripping the
459 // content of posts when updating without a logged in
460 // user who has `unfiltered_html` capability.
461 add_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
462
463 // Don't munge status fields that the user may have reset manually
464 if (function_exists('get_post_field')) :
465 $doNotMunge = array('post_status', 'comment_status', 'ping_status');
466 foreach ($doNotMunge as $field) :
467 $dbpost[$field] = get_post_field($field, $this->wp_id());
468 endforeach;
469 endif;
470
471 $this->_wp_id = wp_insert_post($dbpost);
472
473 // Turn off ridiculous fucking kludges #1 and #2
474 remove_action('_wp_put_post_revision', array($this, 'fix_revision_meta'));
475 remove_filter('content_save_pre', array($this, 'avoid_kses_munge'), 11);
476
477 $this->validate_post_id($dbpost, array(__CLASS__, __FUNCTION__));
478
479 // Unfortunately, as of WordPress 2.3, wp_insert_post()
480 // munges your post modified timestamp.
481 $result = $wpdb->query("
482 UPDATE $wpdb->posts
483 SET
484 post_modified='{$dbpost['post_modified']}',
485 post_modified_gmt='{$dbpost['post_modified_gmt']}'
486 WHERE ID='{$this->_wp_id}'
487 ");
488 else :
489
490 $result = $wpdb->query("
491 UPDATE $wpdb->posts
492 SET
493 post_author = '{$dbpost['post_author']}',
494 post_content = '{$dbpost['post_content']}',"
495 .(isset($dbpost['post_excerpt']) ? "post_excerpt = '{$dbpost['post_excerpt']}'," : "")."
496 post_title = '{$dbpost['post_title']}',
497 post_name = '{$dbpost['post_name']}',
498 post_modified = '{$dbpost['post_modified']}',
499 post_modified_gmt = '{$dbpost['post_modified_gmt']}'
500 WHERE guid='{$dbpost['guid']}'
501 ");
502
503 // WordPress 2.1.x and up
504 if (function_exists('wp_set_post_categories')) :
505 wp_set_post_categories($this->wp_id(), $this->post['post_category']);
506 // WordPress 1.5.x - 2.0.x
507 elseif (function_exists('wp_set_post_cats')) :
508 wp_set_post_cats('1', $this->wp_id(), $this->post['post_category']);
509 // This should never happen.
510 else :
511 FeedWordPress::critical_bug(__CLASS__.'::'.__FUNCTION.'(): no post categorizing function', array("dbpost" => $dbpost, "this" => $this), __LINE__);
512 endif;
513
514 // Since we are not going through official channels, we need to
515 // manually tell WordPress that we've published a new post.
516 // We need to make sure to do this in order for FeedWordPress
517 // to play well with the staticize-reloaded plugin (something
518 // that a large aggregator website is going to *want* to be
519 // able to use).
520 do_action('edit_post', $this->post['ID']);
521 endif;
522 endif;
523 } /* SyndicatedPost::update_existing() */
524
525 /**
526 * SyndicatedPost::normalize_post()
527 *
528 * @param bool $new If true, this post is to be inserted anew. If false, it is an update of an existing post.
529 * @return array A normalized representation of the post ready to be inserted into the database or sent to the WordPress API functions
530 */
531 function normalize_post ($new = true) {
532 global $wpdb;
533
534 $out = array();
535
536 // Why the fuck doesn't wp_insert_post already do this?
537 foreach ($this->post as $key => $value) :
538 if (is_string($value)) :
539 $out[$key] = $wpdb->escape($value);
540 else :
541 $out[$key] = $value;
542 endif;
543 endforeach;
544
545 if (strlen($out['post_title'].$out['post_content'].$out['post_excerpt']) == 0) :
546 // FIXME: Option for filtering out empty posts
547 endif;
548 if (strlen($out['post_title'])==0) :
549 $offset = (int) get_option('gmt_offset') * 60 * 60;
550 $out['post_title'] =
551 $this->post['meta']['syndication_source']
552 .' '.gmdate('Y-m-d H:i:s', $this->published() + $offset);
553 // FIXME: Option for what to fill a blank title with...
554 endif;
555
556 return $out;
557 }
558
559 /**
560 * SyndicatedPost::validate_post_id()
561 *
562 * @param array $dbpost An array representing the post we attempted to insert or update
563 * @param mixed $ns A string or array representing the namespace (class, method) whence this method was called.
564 */
565 function validate_post_id ($dbpost, $ns) {
566 if (is_array($ns)) : $ns = implode('::', $ns);
567 else : $ns = (string) $ns; endif;
568
569 // This should never happen.
570 if (!is_numeric($this->_wp_id) or ($this->_wp_id == 0)) :
571 FeedWordPress::critical_bug(
572 /*name=*/ $ns.'::_wp_id',
573 /*var =*/ array(
574 "\$this->_wp_id" => $this->_wp_id,
575 "\$dbpost" => $dbpost,
576 "\$this" => $this
577 ),
578 /*line # =*/ __LINE__
579 );
580 endif;
581 } /* SyndicatedPost::validate_post_id() */
582
583 /**
584 * SyndicatedPost::fix_revision_meta() - Fixes the way WP 2.6+ fucks up
585 * meta-data (authorship, etc.) when storing revisions of an updated
586 * syndicated post.
587 *
588 * In their infinite wisdom, the WordPress coders have made it completely
589 * impossible for a plugin that uses wp_insert_post() to set certain
590 * meta-data (such as the author) when you store an old revision of an
591 * updated post. Instead, it uses the WordPress defaults (= currently
592 * active user ID if the process is running with a user logged in, or
593 * = #0 if there is no user logged in). This results in bogus authorship
594 * data for revisions that are syndicated from off the feed, unless we
595 * use a ridiculous kludge like this to end-run the munging of meta-data
596 * by _wp_put_post_revision.
597 *
598 * @param int $revision_id The revision ID to fix up meta-data
599 */
600 function fix_revision_meta ($revision_id) {
601 global $wpdb;
602
603 $post_author = (int) $this->post['post_author'];
604
605 $revision_id = (int) $revision_id;
606 $wpdb->query("
607 UPDATE $wpdb->posts
608 SET post_author={$this->post['post_author']}
609 WHERE post_type = 'revision' AND ID='$revision_id'
610 ");
611 } /* SyndicatedPost::fix_revision_meta () */
612
613 /**
614 * SyndicatedPost::avoid_kses_munge() -- If FeedWordPress is processing
615 * an automatic update, that generally means that wp_insert_post() is
616 * being called under the user credentials of whoever is viewing the
617 * blog at the time -- usually meaning no user at all. But if WordPress
618 * gets a wp_insert_post() when current_user_can('unfiltered_html') is
619 * false, it will run the content of the post through a kses function
620 * that strips out lots of HTML tags -- notably <object> and some others.
621 * This causes problems for syndicating (for example) feeds that contain
622 * YouTube videos. It also produces an unexpected asymmetry between
623 * automatically-initiated updates and updates initiated manually from
624 * the WordPress Dashboard (which are usually initiated under the
625 * credentials of a logged-in admin, and so don't get run through the
626 * kses function). So, to avoid the whole mess, what we do here is
627 * just forcibly disable the kses munging for a single syndicated post,
628 * by restoring the contents of the `post_content` field.
629 *
630 * @param string $content The content of the post, after other filters have gotten to it
631 * @return string The original content of the post, before other filters had a chance to munge it.
632 */
633 function avoid_kses_munge ($content) {
634 global $wpdb;
635 return $wpdb->escape($this->post['post_content']);
636 }
637
638 // SyndicatedPost::add_rss_meta: adds interesting meta-data to each entry
639 // using the space for custom keys. The set of keys and values to add is
640 // specified by the keys and values of $post['meta']. This is used to
641 // store anything that the WordPress user might want to access from a
642 // template concerning the post's original source that isn't provided
643 // for by standard WP meta-data (i.e., any interesting data about the
644 // syndicated post other than author, title, timestamp, categories, and
645 // guid). It's also used to hook into WordPress's support for
646 // enclosures.
647 function add_rss_meta () {
648 global $wpdb;
649 if ( is_array($this->post) and isset($this->post['meta']) and is_array($this->post['meta']) ) :
650 $postId = $this->wp_id();
651
652 // Aggregated posts should NOT send out pingbacks.
653 // WordPress 2.1-2.2 claim you can tell them not to
654 // using $post_pingback, but they don't listen, so we
655 // make sure here.
656 $result = $wpdb->query("
657 DELETE FROM $wpdb->postmeta
658 WHERE post_id='$postId' AND meta_key='_pingme'
659 ");
660
661 foreach ( $this->post['meta'] as $key => $values ) :
662
663 $key = $wpdb->escape($key);
664
665 // If this is an update, clear out the old
666 // values to avoid duplication.
667 $result = $wpdb->query("
668 DELETE FROM $wpdb->postmeta
669 WHERE post_id='$postId' AND meta_key='$key'
670 ");
671
672 // Allow for either a single value or an array
673 if (!is_array($values)) $values = array($values);
674 foreach ( $values as $value ) :
675 $value = $wpdb->escape($value);
676 $result = $wpdb->query("
677 INSERT INTO $wpdb->postmeta
678 SET
679 post_id='$postId',
680 meta_key='$key',
681 meta_value='$value'
682 ");
683 endforeach;
684 endforeach;
685 endif;
686 } /* SyndicatedPost::add_rss_meta () */
687
688 // SyndicatedPost::author_id (): get the ID for an author name from
689 // the feed. Create the author if necessary.
690 function author_id ($unfamiliar_author = 'create') {
691 global $wpdb;
692
693 $a = $this->author();
694 $author = $a['name'];
695 $email = $a['email'];
696 $url = $a['uri'];
697
698 $match_author_by_email = !('yes' == get_option("feedwordpress_do_not_match_author_by_email"));
699 if ($match_author_by_email and !FeedWordPress::is_null_email($email)) :
700 $test_email = $email;
701 else :
702 $test_email = NULL;
703 endif;
704
705 // Never can be too careful...
706 $login = sanitize_user($author, /*strict=*/ true);
707 $login = apply_filters('pre_user_login', $login);
708
709 $nice_author = sanitize_title($author);
710 $nice_author = apply_filters('pre_user_nicename', $nice_author);
711
712 $reg_author = $wpdb->escape(preg_quote($author));
713 $author = $wpdb->escape($author);
714 $email = $wpdb->escape($email);
715 $test_email = $wpdb->escape($test_email);
716 $url = $wpdb->escape($url);
717
718 // Check for an existing author rule....
719 if (isset($this->link->settings['map authors']['name'][strtolower(trim($author))])) :
720 $author_rule = $this->link->settings['map authors']['name'][strtolower(trim($author))];
721 else :
722 $author_rule = NULL;
723 endif;
724
725 // User name is mapped to a particular author. If that author ID exists, use it.
726 if (is_numeric($author_rule) and get_userdata((int) $author_rule)) :
727 $id = (int) $author_rule;
728
729 // User name is filtered out
730 elseif ('filter' == $author_rule) :
731 $id = NULL;
732
733 else :
734 // Check the database for an existing author record that might fit
735
736 #-- WordPress 2.0+
737 if (fwp_test_wp_version(FWP_SCHEMA_HAS_USERMETA)) :
738
739 // First try the user core data table.
740 $id = $wpdb->get_var(
741 "SELECT ID FROM $wpdb->users
742 WHERE
743 TRIM(LCASE(user_login)) = TRIM(LCASE('$login'))
744 OR (
745 LENGTH(TRIM(LCASE(user_email))) > 0
746 AND TRIM(LCASE(user_email)) = TRIM(LCASE('$test_email'))
747 )
748 OR TRIM(LCASE(user_nicename)) = TRIM(LCASE('$nice_author'))
749 ");
750
751 // If that fails, look for aliases in the user meta data table
752 if (is_null($id)) :
753 $id = $wpdb->get_var(
754 "SELECT user_id FROM $wpdb->usermeta
755 WHERE
756 (meta_key = 'description' AND TRIM(LCASE(meta_value)) = TRIM(LCASE('$author')))
757 OR (
758 meta_key = 'description'
759 AND TRIM(LCASE(meta_value))
760 RLIKE CONCAT(
761 '(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*',
762 TRIM(LCASE('$reg_author')),
763 '( |\\t|\\r)*(\\n|\$)'
764 )
765 )
766 ");
767 endif;
768
769 #-- WordPress 1.5.x
770 else :
771 $id = $wpdb->get_var(
772 "SELECT ID from $wpdb->users
773 WHERE
774 TRIM(LCASE(user_login)) = TRIM(LCASE('$login')) OR
775 (
776 LENGTH(TRIM(LCASE(user_email))) > 0
777 AND TRIM(LCASE(user_email)) = TRIM(LCASE('$test_email'))
778 ) OR
779 TRIM(LCASE(user_firstname)) = TRIM(LCASE('$author')) OR
780 TRIM(LCASE(user_nickname)) = TRIM(LCASE('$author')) OR
781 TRIM(LCASE(user_nicename)) = TRIM(LCASE('$nice_author')) OR
782 TRIM(LCASE(user_description)) = TRIM(LCASE('$author')) OR
783 (
784 LOWER(user_description)
785 RLIKE CONCAT(
786 '(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*',
787 LCASE('$reg_author'),
788 '( |\\t|\\r)*(\\n|\$)'
789 )
790 )
791 ");
792
793 endif;
794
795 // ... if you don't find one, then do what you need to do
796 if (is_null($id)) :
797 if ($unfamiliar_author === 'create') :
798 $userdata = array();
799
800 #-- user table data
801 $userdata['ID'] = NULL; // new user
802 $userdata['user_login'] = $login;
803 $userdata['user_nicename'] = $nice_author;
804 $userdata['user_pass'] = substr(md5(uniqid(microtime())), 0, 6); // just something random to lock it up
805 $userdata['user_email'] = $email;
806 $userdata['user_url'] = $url;
807 $userdata['display_name'] = $author;
808
809 $id = wp_insert_user($userdata);
810 elseif (is_numeric($unfamiliar_author) and get_userdata((int) $unfamiliar_author)) :
811 $id = (int) $unfamiliar_author;
812 elseif ($unfamiliar_author === 'default') :
813 $id = 1;
814 endif;
815 endif;
816 endif;
817
818 if ($id) :
819 $this->link->settings['map authors']['name'][strtolower(trim($author))] = $id;
820 endif;
821 return $id;
822 } // function SyndicatedPost::author_id ()
823
824 // look up (and create) category ids from a list of categories
825 function category_ids ($cats, $unfamiliar_category = 'create', $tags_too = false) {
826 global $wpdb;
827
828 // We need to normalize whitespace because (1) trailing
829 // whitespace can cause PHP and MySQL not to see eye to eye on
830 // VARCHAR comparisons for some versions of MySQL (cf.
831 // <http://dev.mysql.com/doc/mysql/en/char.html>), and (2)
832 // because I doubt most people want to make a semantic
833 // distinction between 'Computers' and 'Computers '
834 $cats = array_map('trim', $cats);
835
836 $tags = array();
837
838 $cat_ids = array ();
839 foreach ($cats as $cat_name) :
840 if (preg_match('/^{#([0-9]+)}$/', $cat_name, $backref)) :
841 $cat_id = (int) $backref[1];
842 if (function_exists('is_term') and is_term($cat_id, 'category')) :
843 $cat_ids[] = $cat_id;
844 elseif (get_category($cat_id)) :
845 $cat_ids[] = $cat_id;
846 endif;
847 elseif (strlen($cat_name) > 0) :
848 $esc = $wpdb->escape($cat_name);
849 $resc = $wpdb->escape(preg_quote($cat_name));
850
851 // WordPress 2.3+
852 if (function_exists('is_term')) :
853 $cat_id = is_term($cat_name, 'category');
854 if ($cat_id) :
855 $cat_ids[] = $cat_id['term_id'];
856 // There must be a better way to do this...
857 elseif ($results = $wpdb->get_results(
858 "SELECT term_id
859 FROM $wpdb->term_taxonomy
860 WHERE
861 LOWER(description) RLIKE
862 CONCAT('(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*', LOWER('{$resc}'), '( |\\t|\\r)*(\\n|\$)')"
863 )) :
864 foreach ($results AS $term) :
865 $cat_ids[] = (int) $term->term_id;
866 endforeach;
867 elseif ('tag'==$unfamiliar_category) :
868 $tags[] = $cat_name;
869 elseif ('create'===$unfamiliar_category) :
870 $term = wp_insert_term($cat_name, 'category');
871 if (is_wp_error($term)) :
872 FeedWordPress::noncritical_bug('term insertion problem', array('cat_name' => $cat_name, 'term' => $term, 'this' => $this), __LINE__);
873 else :
874 $cat_ids[] = $term['term_id'];
875 endif;
876 endif;
877
878 // WordPress 1.5.x - 2.2.x
879 else :
880 $results = $wpdb->get_results(
881 "SELECT cat_ID
882 FROM $wpdb->categories
883 WHERE
884 (LOWER(cat_name) = LOWER('$esc'))
885 OR (LOWER(category_description)
886 RLIKE CONCAT('(^|\\n)a\\.?k\\.?a\\.?( |\\t)*:?( |\\t)*', LOWER('{$resc}'), '( |\\t|\\r)*(\\n|\$)'))
887 ");
888 if ($results) :
889 foreach ($results as $term) :
890 $cat_ids[] = (int) $term->cat_ID;
891 endforeach;
892 elseif ('create'===$unfamiliar_category) :
893 if (function_exists('wp_insert_category')) :
894 $cat_id = wp_insert_category(array('cat_name' => $cat_name));
895 // And into the database we go.
896 else :
897 $nice_kitty = sanitize_title($cat_name);
898 $wpdb->query(sprintf("
899 INSERT INTO $wpdb->categories
900 SET
901 cat_name='%s',
902 category_nicename='%s'
903 ", $wpdb->escape($cat_name), $nice_kitty
904 ));
905 $cat_id = $wpdb->insert_id;
906 endif;
907 $cat_ids[] = $cat_id;
908 endif;
909 endif;
910 endif;
911 endforeach;
912
913 if ((count($cat_ids) == 0) and ($unfamiliar_category === 'filter')) :
914 $cat_ids = NULL; // Drop the post
915 else :
916 $cat_ids = array_unique($cat_ids);
917 endif;
918
919 if ($tags_too) : $ret = array($cat_ids, $tags);
920 else : $ret = $cat_ids;
921 endif;
922
923 return $ret;
924 } // function SyndicatedPost::category_ids ()
925
926 function use_api ($tag) {
927 global $wp_db_version;
928 switch ($tag) :
929 case 'wp_insert_post':
930 // Before 2.2, wp_insert_post does too much of the wrong stuff to use it
931 // In 1.5 it was such a resource hog it would make PHP segfault on big updates
932 $ret = (isset($wp_db_version) and $wp_db_version > FWP_SCHEMA_21);
933 break;
934 case 'post_status_pending':
935 $ret = (isset($wp_db_version) and $wp_db_version > FWP_SCHEMA_23);
936 break;
937 endswitch;
938 return $ret;
939 } // function SyndicatedPost::use_api ()
940
941 #### EXTRACT DATA FROM FEED ITEM ####
942
943 function created () {
944 $epoch = null;
945 if (isset($this->item['dc']['created'])) :
946 $epoch = @parse_w3cdtf($this->item['dc']['created']);
947 elseif (isset($this->item['dcterms']['created'])) :
948 $epoch = @parse_w3cdtf($this->item['dcterms']['created']);
949 elseif (isset($this->item['created'])): // Atom 0.3
950 $epoch = @parse_w3cdtf($this->item['created']);
951 endif;
952 return $epoch;
953 }
954 function published ($fallback = true) {
955 $epoch = null;
956
957 # RSS is a fucking mess. Figure out whether we have a date in
958 # <dc:date>, <issued>, <pubDate>, etc., and get it into Unix
959 # epoch format for reformatting. If we can't find anything,
960 # we'll use the last-updated time.
961 if (isset($this->item['dc']['date'])): // Dublin Core
962 $epoch = @parse_w3cdtf($this->item['dc']['date']);
963 elseif (isset($this->item['dcterms']['issued'])) : // Dublin Core extensions
964 $epoch = @parse_w3cdtf($this->item['dcterms']['issued']);
965 elseif (isset($this->item['published'])) : // Atom 1.0
966 $epoch = @parse_w3cdtf($this->item['published']);
967 elseif (isset($this->item['issued'])): // Atom 0.3
968 $epoch = @parse_w3cdtf($this->item['issued']);
969 elseif (isset($this->item['pubdate'])): // RSS 2.0
970 $epoch = strtotime($this->item['pubdate']);
971 elseif ($fallback) : // Fall back to <updated> / <modified> if present
972 $epoch = $this->updated(/*fallback=*/ false);
973 endif;
974
975 # If everything failed, then default to the current time.
976 if (is_null($epoch)) :
977 if (-1 == $default) :
978 $epoch = time();
979 else :
980 $epoch = $default;
981 endif;
982 endif;
983
984 return $epoch;
985 }
986 function updated ($fallback = true, $default = -1) {
987 $epoch = null;
988
989 # As far as I know, only dcterms and Atom have reliable ways to
990 # specify when something was *modified* last. If neither is
991 # available, then we'll try to get the time of publication.
992 if (isset($this->item['dc']['modified'])) : // Not really correct
993 $epoch = @parse_w3cdtf($this->item['dc']['modified']);
994 elseif (isset($this->item['dcterms']['modified'])) : // Dublin Core extensions
995 $epoch = @parse_w3cdtf($this->item['dcterms']['modified']);
996 elseif (isset($this->item['modified'])): // Atom 0.3
997 $epoch = @parse_w3cdtf($this->item['modified']);
998 elseif (isset($this->item['updated'])): // Atom 1.0
999 $epoch = @parse_w3cdtf($this->item['updated']);
1000 elseif ($fallback) : // Fall back to issued / dc:date
1001 $epoch = $this->published(/*fallback=*/ false, /*default=*/ $default);
1002 endif;
1003
1004 # If everything failed, then default to the current time.
1005 if (is_null($epoch)) :
1006 if (-1 == $default) :
1007 $epoch = time();
1008 else :
1009 $epoch = $default;
1010 endif;
1011 endif;
1012
1013 return $epoch;
1014 }
1015
1016 function update_hash () {
1017 return md5(serialize($this->item));
1018 }
1019
1020 function guid () {
1021 $guid = null;
1022 if (isset($this->item['id'])): // Atom 0.3 / 1.0
1023 $guid = $this->item['id'];
1024 elseif (isset($this->item['atom']['id'])) : // Namespaced Atom
1025 $guid = $this->item['atom']['id'];
1026 elseif (isset($this->item['guid'])) : // RSS 2.0
1027 $guid = $this->item['guid'];
1028 elseif (isset($this->item['dc']['identifier'])) :// yeah, right
1029 $guid = $this->item['dc']['identifier'];
1030 else :
1031 // The feed does not seem to have provided us with a
1032 // unique identifier, so we'll have to cobble together
1033 // a tag: URI that might work for us. The base of the
1034 // URI will be the host name of the feed source ...
1035 $bits = parse_url($this->feedmeta['link/uri']);
1036 $guid = 'tag:'.$bits['host'];
1037
1038 // If we have a date of creation, then we can use that
1039 // to uniquely identify the item. (On the other hand, if
1040 // the feed producer was consicentious enough to
1041 // generate dates of creation, she probably also was
1042 // conscientious enough to generate unique identifiers.)
1043 if (!is_null($this->created())) :
1044 $guid .= '://post.'.date('YmdHis', $this->created());
1045
1046 // Otherwise, use both the URI of the item, *and* the
1047 // item's title. We have to use both because titles are
1048 // often not unique, and sometimes links aren't unique
1049 // either (e.g. Bitch (S)HITLIST, Mozilla Dot Org news,
1050 // some podcasts). But it's rare to have *both* the same
1051 // title *and* the same link for two different items. So
1052 // this is about the best we can do.
1053 else :
1054 $guid .= '://'.md5($this->item['link'].'/'.$this->item['title']);
1055 endif;
1056 endif;
1057 return $guid;
1058 }
1059
1060 function author () {
1061 $author = array ();
1062
1063 if (isset($this->item['author_name'])):
1064 $author['name'] = $this->item['author_name'];
1065 elseif (isset($this->item['dc']['creator'])):
1066 $author['name'] = $this->item['dc']['creator'];
1067 elseif (isset($this->item['dc']['contributor'])):
1068 $author['name'] = $this->item['dc']['contributor'];
1069 elseif (isset($this->feed->channel['dc']['creator'])) :
1070 $author['name'] = $this->feed->channel['dc']['creator'];
1071 elseif (isset($this->feed->channel['dc']['contributor'])) :
1072 $author['name'] = $this->feed->channel['dc']['contributor'];
1073 elseif (isset($this->feed->channel['author_name'])) :
1074 $author['name'] = $this->feed->channel['author_name'];
1075 elseif ($this->feed->is_rss() and isset($this->item['author'])) :
1076 // The author element in RSS is allegedly an
1077 // e-mail address, but lots of people don't use
1078 // it that way. So let's make of it what we can.
1079 $author = parse_email_with_realname($this->item['author']);
1080
1081 if (!isset($author['name'])) :
1082 if (isset($author['email'])) :
1083 $author['name'] = $author['email'];
1084 else :
1085 $author['name'] = $this->feed->channel['title'];
1086 endif;
1087 endif;
1088 else :
1089 $author['name'] = $this->feed->channel['title'];
1090 endif;
1091
1092 if (isset($this->item['author_email'])):
1093 $author['email'] = $this->item['author_email'];
1094 elseif (isset($this->feed->channel['author_email'])) :
1095 $author['email'] = $this->feed->channel['author_email'];
1096 endif;
1097
1098 if (isset($this->item['author_url'])):
1099 $author['uri'] = $this->item['author_url'];
1100 elseif (isset($this->feed->channel['author_url'])) :
1101 $author['uri'] = $this->item['author_url'];
1102 else:
1103 $author['uri'] = $this->feed->channel['link'];
1104 endif;
1105
1106 return $author;
1107 } // SyndicatedPost::author()
1108
1109 var $uri_attrs = array (
1110 array('a', 'href'),
1111 array('applet', 'codebase'),
1112 array('area', 'href'),
1113 array('blockquote', 'cite'),
1114 array('body', 'background'),
1115 array('del', 'cite'),
1116 array('form', 'action'),
1117 array('frame', 'longdesc'),
1118 array('frame', 'src'),
1119 array('iframe', 'longdesc'),
1120 array('iframe', 'src'),
1121 array('head', 'profile'),
1122 array('img', 'longdesc'),
1123 array('img', 'src'),
1124 array('img', 'usemap'),
1125 array('input', 'src'),
1126 array('input', 'usemap'),
1127 array('ins', 'cite'),
1128 array('link', 'href'),
1129 array('object', 'classid'),
1130 array('object', 'codebase'),
1131 array('object', 'data'),
1132 array('object', 'usemap'),
1133 array('q', 'cite'),
1134 array('script', 'src')
1135 ); /* var SyndicatedPost::$uri_attrs */
1136
1137 var $_base = null;
1138
1139 function resolve_single_relative_uri ($refs) {
1140 $tag = FeedWordPressHTML::attributeMatch($refs);
1141 $url = Relative_URI::resolve($tag['value'], $this->_base);
1142 return $tag['prefix'] . $url . $tag['suffix'];
1143 } /* function SyndicatedPost::resolve_single_relative_uri() */
1144
1145 function resolve_relative_uris ($content, $obj) {
1146 # The MagpieRSS upgrade has some `xml:base` support baked in.
1147 # However, sometimes people do silly things, like putting
1148 # relative URIs out on a production RSS 2.0 feed or other feeds
1149 # with no good support for `xml:base`. So we'll do our best to
1150 # try to catch any remaining relative URIs and resolve them as
1151 # best we can.
1152 $obj->_base = $obj->item['link']; // Reset the base for resolving relative URIs
1153
1154 foreach ($obj->uri_attrs as $pair) :
1155 list($tag, $attr) = $pair;
1156 $pattern = FeedWordPressHTML::attributeRegex($tag, $attr);
1157 $content = preg_replace_callback (
1158 $pattern,
1159 array(&$obj, 'resolve_single_relative_uri'),
1160 $content
1161 );
1162 endforeach;
1163
1164 return $content;
1165 } /* function SyndicatedPost::resolve_relative_uris () */
1166
1167 var $strip_attrs = array (
1168 array('[a-z]+', 'target'),
1169// array('[a-z]+', 'style'),
1170// array('[a-z]+', 'on[a-z]+'),
1171 );
1172
1173 function strip_attribute_from_tag ($refs) {
1174 $tag = FeedWordPressHTML::attributeMatch($refs);
1175 return $tag['before_attribute'].$tag['after_attribute'];
1176 }
1177
1178 function sanitize_content ($content, $obj) {
1179 # This kind of sucks. I intend to replace it with
1180 # lib_filter sometime soon.
1181 foreach ($obj->strip_attrs as $pair):
1182 list($tag,$attr) = $pair;
1183 $pattern = FeedWordPressHTML::attributeRegex($tag, $attr);
1184
1185 $content = preg_replace_callback (
1186 $pattern,
1187 array(&$obj, 'strip_attribute_from_tag'),
1188 $content
1189 );
1190 endforeach;
1191 return $content;
1192 }
1193} // class SyndicatedPost
1194
Note: See TracBrowser for help on using the repository browser.