source: trunk/www.guidonia.net/wp/wp-content/plugins/parteibuch-aggregator/pba-rsssearch.php@ 44

Last change on this file since 44 was 44, checked in by luciano, 15 years ago
File size: 38.5 KB
RevLine 
[44]1<?php
2
3//initialization of config variables,
4//these allow
5//1. optional cronjobs to call the pba-rsssearch.php directly
6//2. have the search index stored in a seperate database
7//no good idea to place config here, better define them in your wp-config.php
8$pba_search_directcall_password="putinsomethingfullinyourwpconfig"; //to override, define PBA_SEARCH_DIRECTCALL_PASSWORD in your wp-config
9$pba_search_enable_directcall=false; //disabled by default with false, to override, define PBA_SEARCH_ENABLE_DIRECTCALL as true in your wp-config
10$pba_search_database=""; //empty string "" for default database name, to override, define PBA_SEARCH_CONFIG in wp-config
11
12//internal variable initialisation
13$bdprsssearchdebug=false; //bdprsssearchdebug tells this script and the aggregator also that it is called in direct mode
14$bdprsssearchdebug_with_searchindex_info=true;
15$pba_search_directcall_comparepassword="";
16
17$pba_search_direct_call_attempt=false;
18if( !class_exists('BDPRSS2') && !isset($wpdb)){
19 $pba_search_direct_call_attempt=true;
20 require_once('../../../wp-config.php');
21}
22
23if(defined('PBA_SEARCH_DATABASE')) $pba_search_database = PBA_SEARCH_DATABASE;
24if(defined('PBA_SEARCH_ENABLE_DIRECTCALL')) $pba_search_enable_directcall = PBA_SEARCH_ENABLE_DIRECTCALL;
25if(defined('PBA_SEARCH_DIRECTCALL_PASSWORD')) $pba_search_directcall_password = PBA_SEARCH_DIRECTCALL_PASSWORD;
26
27if(isset($_GET['pbasearchpassword']))$pba_search_directcall_comparepassword=stripslashes($_GET['pbasearchpassword']);
28
29if($pba_search_direct_call_attempt){
30 if(!$pba_search_enable_directcall || $pba_search_directcall_comparepassword != $pba_search_directcall_password){
31 die ( __('Exiting pba_rsssearch before execution: either pba_search_enable_directcall not enabled or pba_search_directcall_password wrong.') );
32 }else{
33 $bdprsssearchdebug=true;
34 echo "Debug ... loading wp-config.php ... ";
35 }
36}
37
38
39if( !class_exists('BDPRSS_SEARCH') ) {
40
41 class BDPRSS_SEARCH
42 {
43//config values
44var $heap_to_add;
45var $default_heapmode;
46var $bulklines;
47
48//internal script variables
49var $bdprss_searchtable_prefix;
50var $bdprss_searchtable_temp;
51var $bdprss_searchtable_status;
52var $bdprss_searchtable=array();
53
54var $bdprss_globalcounter;
55var $bdprss_bulksql;
56var $bdprss_item_status_sql;
57var $bdprss_item_delete_sql;
58var $get_ids4heap2add_mode_default;
59var $get_ids4heap2add_min_updatetimeage_default;
60var $get_ids4heap2add_max_item_updatetimeage_default;
61var $process_updates;
62var $process_deletes;
63
64 function BDPRSS_SEARCH()
65 {
66 /* BDPRSS_SEARCH() - initialisition function that sets constant names for later use */
67
68 global $pba_search_database, $table_prefix;
69
70 //config values
71 $this->heap_to_add=1000; //maximum possible heap, depends on memory and exec time restricts in php
72 $this->default_heapmode="bulk"; //possible values: "bulk", "temptable"
73 $this->bulklines=20; //never set to zero, may result in division by zero error, max possible value depends on ph memory
74 $this->get_ids4heap2add_mode_default="notinstatus"; //possible values: "notinstatus", "maxitem_id"
75 $this->get_ids4heap2add_min_updatetimeage_default="30";
76 $this->get_ids4heap2add_max_item_updatetimeage_default="86400";
77 $this->process_updates=false;
78 $this->process_deletes=false;
79
80 if($pba_search_database != "") $pba_search_database .= ".";
81 $this->bdprss_searchtable_prefix=$pba_search_database . $table_prefix . "pba_index_";
82
83 $this->bdprss_searchtable[9]=array($this->bdprss_searchtable_prefix . "9","w","{");
84 $this->bdprss_searchtable[8]=array($this->bdprss_searchtable_prefix . "8","u","w");
85 $this->bdprss_searchtable[7]=array($this->bdprss_searchtable_prefix . "7","s","u");
86 $this->bdprss_searchtable[6]=array($this->bdprss_searchtable_prefix . "6","n","s");
87 $this->bdprss_searchtable[5]=array($this->bdprss_searchtable_prefix . "5","j","n");
88 $this->bdprss_searchtable[4]=array($this->bdprss_searchtable_prefix . "4","h","j");
89 $this->bdprss_searchtable[3]=array($this->bdprss_searchtable_prefix . "3","e","h");
90 $this->bdprss_searchtable[2]=array($this->bdprss_searchtable_prefix . "2","d","e");
91 $this->bdprss_searchtable[1]=array($this->bdprss_searchtable_prefix . "1","a","d");
92 $this->bdprss_searchtable[0]=array($this->bdprss_searchtable_prefix . "0","0",":");
93
94 $this->bdprss_searchtable_temp=$this->bdprss_searchtable_prefix . "temp";
95 $this->bdprss_searchtable_status=$this->bdprss_searchtable_prefix . "status"; // 'OK', 'UPDATE', 'INTEMP'
96
97 $this->bdprss_globalcounter=0;
98 $this->bdprss_bulksql=array();
99 $this->bdprss_item_status_sql="";
100 $this->bdprss_item_delete_sql="";
101 } //init function BDPRSS_SEARCH
102
103function bdprss_make_entities_from_searchphrase($searchword=""){
104 global $bdprsssearchdebug;
105 $return_array=array();
106 $linepointer=0;
107 $is_in_quot_mode=false;
108 $continue=false;
109
110 //find chunks enclosed by doublequots
111 $searchword_array=str_split(" " . $searchword . " ");
112 //print_r($searchword_array);
113
114 foreach($searchword_array as $searchword_pos => $searchword_letter){
115 if($continue){ $continue = false; continue; }
116 $return_array[$linepointer]['type']='Plain';
117 if($searchword_letter == " " && ($searchword_array[$searchword_pos + 1] == '"' || $searchword_array[$searchword_pos + 1] == "'") && $is_in_quot_mode==false){
118 if(strlen($return_array[$linepointer]['string'])>0) $linepointer++;
119 $is_in_quot_mode=true;
120 $searchword_letter="";
121 $continue=true;
122 } elseif(($searchword_letter == '"' || $searchword_letter == "'") && $searchword_array[$searchword_pos + 1] == ' ' && $is_in_quot_mode==true){
123 $return_array[$linepointer]['type']='Quoted';
124 if(strlen($return_array[$linepointer]['string'])>0) $linepointer++;
125 $return_array[$linepointer]['type']='Plain';
126 $is_in_quot_mode=false;
127 $searchword_letter="";
128 }
129 $return_array[$linepointer]['string']=$return_array[$linepointer]['string'] . $searchword_letter;
130 }
131 $array_counter=0;
132 $plain_merger="";
133 $sorted_array[0]=array();
134 foreach($return_array as $linepointer => $line){
135 $return_array[$linepointer]['stemmed']= $this->stem_search_text($line['string']);
136 if($line['type']=='Quoted'){
137 $array_counter++;
138 $sorted_array[$array_counter]= explode(' ', $this->stem_search_text($line['string']));
139 } else {
140 $plain_merger=$plain_merger . " " . $line['string'];
141 }
142 }
143 $plain_merger=$this->stem_search_text($plain_merger);
144 if(strlen($plain_merger)>0) $sorted_array[0] = explode(' ', $plain_merger);
145 if($bdprsssearchdebug) {echo "<br>Debug: sorted_array: "; print_r($sorted_array);}
146 return $sorted_array; //in array[0] is single words, in array [1-n] is search phrases
147}
148
149function bdprss_findtableforword($searchword=""){
150 $char2compare=ord(substr($searchword,0,1));
151 foreach($this->bdprss_searchtable as $table_to_search){
152 if($char2compare >= ord($table_to_search[1]) && $char2compare < ord($table_to_search[2])) {
153 return $table_to_search[0];
154 }
155 }
156 return false;
157
158}
159
160function pbasearch_list_tables(){
161 $tables=array();
162 foreach($this->bdprss_searchtable as $table_to_search){
163 $tables[$table_to_search[0]]=true;
164 }
165 $tables[$this->bdprss_searchtable_status]=true;
166 $tables[$this->bdprss_searchtable_temp]=true;
167 return $tables;
168}
169
170function bdprss_create_proc(){
171 global $bdprss_db, $wpdb, $bdprsssearchdebug;
172
173 foreach($this->bdprss_searchtable as $table_to_search){
174 $threshold=chr(ord($table_to_search[2])-1);
175 $sql="CREATE TABLE IF NOT EXISTS $table_to_search[0] (
176 item_id int(10) NOT NULL,
177 index_word varchar(255) NOT NULL,
178 index_position int(10) NOT NULL,
179 PRIMARY KEY (item_id,index_position),
180 KEY idx_word_id (index_word,item_id)
181 ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COMMENT='$table_to_search[1]-$threshold'";
182 $result = $wpdb->query($sql);
183 }
184
185 $sql="CREATE TABLE IF NOT EXISTS $this->bdprss_searchtable_temp (
186 item_id int(10) NOT NULL,
187 index_word varchar(255) NOT NULL,
188 index_position int(10) NOT NULL,
189 PRIMARY KEY (item_id,index_position)
190 ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COMMENT='temp'";
191 $result = $wpdb->query($sql);
192
193 $sql="CREATE TABLE IF NOT EXISTS $this->bdprss_searchtable_status (
194 item_id int(10) NOT NULL,
195 md5 char(32) NOT NULL,
196 status char(6) NOT NULL,
197 item_time int(15) NOT NULL,
198 item_update_time int(15) NOT NULL,
199 PRIMARY KEY (item_id),
200 KEY idx_status_item_id (status,item_id)
201) ENGINE=MyISAM DEFAULT CHARSET=latin1 COMMENT='index status per item id'";
202 $result = $wpdb->query($sql);
203
204 return $result;
205}
206
207function stem_search_text($bdprss_itemtext2search){
208//strip text of html and script tags
209 $bdprss_itemtext2search=BDPRSS2::remove_link_and_cache_links_from_item($bdprss_itemtext2search);
210 $bdprss_itemtext2search=BDPRSS2::packageItemText($bdprss_itemtext2search, 0, 1000000, FALSE, '');
211
212//replace non text with blanks and decapitalize
213 $strg=array("ä","ö","ü","ß");
214 $rpl=array("ae","oe","ue","ss");
215 $bdprss_itemtext2search=utf8_encode(str_replace($strg,$rpl,html_entity_decode(utf8_decode(strtolower(trim($bdprss_itemtext2search))))));
216 $bdprss_itemtext2search=trim(preg_replace('/[^a-z0-9]+/si',' ',$bdprss_itemtext2search));
217
218 return $bdprss_itemtext2search;
219}
220
221function delete_item_from_search($bdprss_delete_searchitem_id){
222 global $bdprss_db, $wpdb, $bdprsssearchdebug;
223 $this->bdprss_item_delete_sql .= ", '$bdprss_delete_searchitem_id'";
224}
225
226function add_item_to_search($bdprss_add_searchitem_id, $addheapmode="", $dodelete=false){
227
228 global $bdprss_db, $wpdb, $bdprsssearchdebug;
229
230 if($addheapmode == "") $addheapmode = $this->default_heapmode;
231 if( $this->process_updates ) $dodelete=true;
232
233 $search_item=$bdprss_db->getItemByID($bdprss_add_searchitem_id);
234 if(!$search_item->item_site_name && !$search_item->item_name && !$search_item->text_body) return;
235 $bdprss_itemtime2search=$search_item->item_time;
236
237//Here we got all the info needed for an entry in search index
238 $bdprss_itemtext2search=$search_item->item_site_name . " " . $search_item->item_name . " " . $search_item->text_body;
239
240 $bdprss_itemtext2search=$this->stem_search_text($bdprss_itemtext2search);
241
242//delete previous items to search index
243
244//add item to search index
245 if($addheapmode == "temptable"){
246 //echo "Just testing, no real insert done";
247 $bdprss_itemtext2searcharray=explode(' ',$bdprss_itemtext2search);
248 foreach($bdprss_itemtext2searcharray as $bdprss_itemtext2searcharraykey => $bdprss_itemtext2searcharrayvalue){
249 $bdprss_tmptable_sql .= ", ($bdprss_add_searchitem_id, '$bdprss_itemtext2searcharrayvalue', $bdprss_itemtext2searcharraykey)";
250 }
251 $bdprss_tmptable_sql= "INSERT INTO " . $this->bdprss_searchtable_temp . " (item_id, index_word , index_position) VALUES " . substr($bdprss_tmptable_sql,2);
252 //echo $bdprss_tmptable_sql;
253 $result = $wpdb->query($bdprss_tmptable_sql);
254 $this->bdprss_item_status_sql .= ", '$bdprss_add_searchitem_id'";
255 } elseif($addheapmode == "bulk"){
256 //echo "Just testing, no real insert done";
257 $bdprss_itemtext2searcharray=explode(' ',$bdprss_itemtext2search);
258 foreach($bdprss_itemtext2searcharray as $bdprss_itemtext2searcharraykey => $bdprss_itemtext2searcharrayvalue){
259 $table4word=$this->bdprss_findtableforword($bdprss_itemtext2searcharrayvalue);
260 //echo " $bdprss_itemtext2searcharrayvalue -> $table4word";
261 if($table4word) {
262 if(!isset($this->bdprss_bulksql[$table4word])) $this->bdprss_bulksql[$table4word] ="";
263 $this->bdprss_bulksql[$table4word] .= ", ($bdprss_add_searchitem_id, '$bdprss_itemtext2searcharrayvalue', $bdprss_itemtext2searcharraykey)";
264 }
265 }
266 $this->bdprss_item_status_sql .= ", '$bdprss_add_searchitem_id'";
267 if($dodelete) $this->bdprss_item_delete_sql .= ", '$bdprss_add_searchitem_id'";
268 } else {
269 $sql = "call insme($bdprss_add_searchitem_id, '" . $bdprss_itemtext2search . "', $bdprss_itemtime2search)";
270 if($bdprsssearchdebug) echo "Debug: Error: no valid addheapmode: $addheapmode ";
271 //$result = $wpdb->query($sql); //broken procedure may be for new addheapmode
272 return false;
273 }
274 $insertcounter=substr_count(utf8_decode($bdprss_itemtext2search), ' ')+1;
275 $this->bdprss_globalcounter+=$insertcounter;
276 if($bdprsssearchdebug) echo " " . $insertcounter . "w. ";
277 flush();
278 return true;
279}
280
281function bdprss_search4items($search_phrase, $start=0, $max=10, $ids=false, $list_id=0, $itemdate="", $feed="", $fromtimestamp=0, $totimestamp=0, $opsfilter=false, $orderbysitename=false, $itemid=false){
282 global $wpdb, $bdprss_db, $found_tickeritems, $bdprsssearchdebug;
283
284 //$itemdate="2009-02-28"; //leave for testing purposes
285 //$feed="http://evilboy.ej.am/blog/?feed=rss2"; //leave for testing purposes
286
287 //make variables secure
288 $list_id=abs(intval($list_id));
289 $fromtimestamp=abs(intval($fromtimestamp));
290 $totimestamp=abs(intval($totimestamp));
291 $start=abs(intval($start));
292 $max=abs(intval($max));
293 if($itemid) $itemid = abs(intval($itemid));
294 if($itemdate && $itemdate!=""){
295 if (!ereg("[0-9][0-9][0-9][0-9]-[0-1][0-9]-[0-3][0-9]", $itemdate)){
296 $itemdate = "";
297 }
298 } else {
299 $itemdate = "";
300 }
301
302//just a test, switch model to be programmed later in a more fitting place
303 if($infeed==""){
304 $infeed=preg_replace("/(feed:[^ ]+).*/",'${1}',$search_phrase);
305 if(strstr(substr($infeed, 0, 5), 'feed:')) {
306 $search_phrase=preg_replace("/(feed:[^ ]+)/",'',$search_phrase);
307 $infeed = mysql_real_escape_string(str_replace("feed:", "", $infeed));
308 if($bdprsssearchdebug) echo "<br>Debug: feed from switch is: " . $infeed . " Remaining searchphrase is: " . $search_phrase;
309 } else {
310 $infeed="";
311 }
312 }
313 if($feed && $feed != ""){
314 $feed = mysql_real_escape_string($feed);
315 } else {
316 $feed = "";
317 }
318
319
320//following is just for testing purposes, get site ids from list
321 if(false && $bdprsssearchdebug && $list_id > 0){
322 $listInfo = $bdprss_db->get_list($list_id);
323 $lurls = $listInfo->{$bdprss_db->lurls};
324 $ids = preg_split("','", $lurls, -1, PREG_SPLIT_NO_EMPTY);
325 $list_id=0;
326 }
327
328//stem search phrase
329 $sorted_array=$this->bdprss_make_entities_from_searchphrase($search_phrase);
330
331 $argument_counter=1;
332 if(false) $straight=" STRAIGHT_JOIN ";
333 if($bdprsssearchdebug) $no_sqlcache=" SQL_NO_CACHE ";
334 if(count($sorted_array[0]) + count($sorted_array[1]) > 0) $checkstatus=true;
335
336 $found_rows = 'SQL_CALC_FOUND_ROWS ';
337 $search_query="SELECT $straight $no_sqlcache $found_rows distinct r1.identifier as $bdprss_db->miid FROM ";
338
339 $search_query_tables="";
340 $search_query_conditions_items="";
341 $search_query_conditions_words="";
342 $search_query_conditions_position="";
343 $search_query_conditions_status="";
344 $search_query_conditions_list="";
345 $search_query_conditions_ids="";
346 $search_query_conditions_itemdate="";
347 $search_query_conditions_feed="";
348 $search_query_conditions_infeed="";
349 $search_query_conditions_ops="";
350 $search_query_conditions_obsn="";
351 $search_query_conditions_itemid="";
352
353 foreach($sorted_array as $sorted_array_key => $sorted_array_value){
354 foreach($sorted_array_value as $sorted_array_value_key => $sorted_array_value_value){
355 $search_query_tables.=", " . $this->bdprss_findtableforword($sorted_array_value_value) . " i" . $argument_counter . "\n";
356 $search_query_conditions_items .= "AND r1.identifier = i" . $argument_counter . ".item_id \n";
357 $search_query_conditions_words .= "AND i" . $argument_counter . ".index_word = '" . $sorted_array_value_value . "' \n";
358 if($sorted_array_key > 0 && $sorted_array_value_key > 0) {
359 $argument_helper = $argument_counter -1;
360 $search_query_conditions_position .= "AND i" . $argument_counter . ".index_position = i" . $argument_helper . ".index_position + 1 \n";
361 }
362 $argument_counter++;
363 }
364 }
365 if($argument_counter > 1) $search_query=str_replace('r1.identifier','i1.item_id', $search_query);
366 $search_query_tables=" " . $bdprss_db->mitemtable . " r1 " . $search_query_tables ;
367 if($checkstatus) {
368 $search_query_tables = " " . $this->bdprss_searchtable_status . " sts, " . $search_query_tables;
369 $search_query_conditions_status="AND r1.identifier = sts.item_id AND ( sts.status = 'OK' OR sts.status = 'UPDATE' ) \n";
370 }
371
372 //one page per site filter
373 if($opsfilter){
374 //give only out one article per site
375 //tough query, but still far from perfect
376 //1st problem - cannot specify, give out two, three or n articles from each site
377 //2nd problem - search filter will search only in latest article per site,
378 //but better would be to search first and then filter doublettes
379 $search_query_tables = " ( SELECT max( ops4.identifier ) as identifier
380 FROM " . $bdprss_db->mitemtable . " ops4, (
381 SELECT ops3.site_id , max( ops3.item_time ) maxtime FROM " . $bdprss_db->mitemtable . " ops3 GROUP BY ops3.site_id
382 )ops2 WHERE ops4.site_id = ops2.site_id AND ops4.item_time = ops2.maxtime
383 GROUP BY ops4.site_id ) ops, " . $search_query_tables;
384 $search_query_conditions_ops="AND r1.identifier = ops.identifier \n";
385 }
386
387 if($feed != ""){
388 if($argument_counter > 1) {
389 $search_query_conditions_feed="AND r1.site_id = (select mst.identifier AS site_id from " . $bdprss_db->msitetable . " mst WHERE mst.feed_url = '" . $feed . "') \n";
390 } else {
391 $search_query_tables = " (select mst.identifier AS site_id from " . $bdprss_db->msitetable . " mst WHERE mst.feed_url = '" . $feed . "') s2, " . $search_query_tables;
392 $search_query_conditions_feed="AND r1.site_id = s2.site_id \n";
393 }
394 } elseif($infeed != ""){
395 if($argument_counter > 1) {
396 $search_query_conditions_feed="AND r1.site_id IN (select mst.identifier AS site_id from " . $bdprss_db->msitetable . " mst WHERE mst.feed_url LIKE '%" . $infeed . "%') \n";
397 } else {
398 $search_query_tables = " (select mst.identifier AS site_id from " . $bdprss_db->msitetable . " mst WHERE mst.feed_url LIKE '%" . $infeed . "%') s2, " . $search_query_tables;
399 $search_query_conditions_feed="AND r1.site_id = s2.site_id \n";
400 }
401 }
402
403 if($list_id > 0) {
404 if($argument_counter > 1) {
405 //both list options are logically synonym, but mysql optimizer treats both query styles very diffrent
406 $search_query_conditions_list="AND r1.site_id in (select sites.identifier AS site_id from (" . $bdprss_db->listtable . " lists join " . $bdprss_db->sitetable . " sites) where ((concat(_latin1',',lists.url_list,_latin1',') like concat(_utf8'%,',sites.identifier,_utf8',%')) or (lists.list_all = _latin1'Y')) and lists.identifier = '" . $list_id . "') \n";
407 } else {
408 $search_query_tables = " (select sites.identifier AS site_id from (" . $bdprss_db->listtable . " lists join " . $bdprss_db->msitetable . " sites) where ((concat(_latin1',',lists.url_list,_latin1',') like concat(_utf8'%,',sites.identifier,_utf8',%')) or (lists.list_all = _latin1'Y')) and lists.identifier = '" . $list_id . "') s1, " . $search_query_tables;
409 $search_query_conditions_list="AND r1.site_id = s1.site_id \n";
410 }
411 } elseif($ids) {
412 if($argument_counter > 1) {
413 $virgin = true;
414 foreach($ids as $id) {
415 if(!$id) continue;
416 if($virgin)
417 $search_query_conditions_ids .= "AND ( ";
418 else
419 $search_query_conditions_ids .= "OR";
420 $search_query_conditions_ids .= " $bdprss_db->misiteid='" . abs(intval($id)) . "' ";
421 $virgin = false;
422 }
423 if(!$virgin) $search_query_conditions_ids .= ") ";
424 }else{
425 $replace_ids="'-1'";
426 foreach($ids as $id) {
427 $replace_ids .= ", '".abs(intval($id))."'";
428 }
429 $replace_ids=str_replace("'-1',",'', $replace_ids);
430 $search_query_tables = " (select sites.identifier AS site_id from " . $bdprss_db->msitetable . " sites WHERE identifier IN (".$replace_ids.")) s1, " . $search_query_tables;
431 $search_query_conditions_ids="AND r1.site_id = s1.site_id \n";
432 }
433 }
434 if($itemdate!="") $search_query_conditions_itemdate .="AND r1.item_time >= UNIX_TIMESTAMP( '" . $itemdate . "' ) AND r1.item_time < UNIX_TIMESTAMP( '" . $itemdate . "' ) + 24 *60 *60 \n";
435 if($fromtimestamp > 0) $search_query_conditions_itemdate .="AND r1.item_time >= '" . $fromtimestamp . "' \n";
436 if($totimestamp > 0) $search_query_conditions_itemdate .="AND r1.item_time <= '" . $totimestamp . "' \n";
437
438 if($orderbysitename){
439 //this order seems to be only sensible together with opsfilter
440 $search_query_tables = " " . $bdprss_db->msitetable . " obsn, " . $search_query_tables;
441 $search_query_conditions_obsn="AND r1.site_id = obsn.identifier \n";
442 $search_query_order=" ORDER BY obsn.site_name ASC ";
443 }else{
444 $search_query_order=" ORDER BY r1.item_time DESC ";
445 }
446
447 //looks strange, but gives possibility to check, if item_id is hit with all the other filter applied
448 if($itemid) $search_query_conditions_itemid="AND r1.identifier = '".$itemid."' \n";
449
450 $search_query_conditions="WHERE 1 " . $search_query_conditions_words . $search_query_conditions_items . $search_query_conditions_position . $search_query_conditions_ids . $search_query_conditions_status . $search_query_conditions_list . $search_query_conditions_itemdate . $search_query_conditions_feed . $search_query_conditions_ops . $search_query_conditions_obsn . $search_query_conditions_itemid;
451 $search_query_conditions=str_replace('WHERE 1 AND','WHERE', $search_query_conditions);
452
453 $search_query_limits=" LIMIT $start , $max ";
454
455 $search_query= $search_query . $search_query_tables . $search_query_conditions . $search_query_order . $search_query_limits;
456 if($bdprsssearchdebug) echo "<br>Debug: search_query: " . $search_query;
457
458 $tmp_result = $wpdb->get_results($search_query);
459 if ( $search_query_limits ) {
460 $found_tickeritems_query = apply_filters( 'found_tickeritems_query', 'SELECT FOUND_ROWS()' );
461 $found_tickeritems = $wpdb->get_var( $found_tickeritems_query );
462 }
463
464 if($bdprsssearchdebug) echo "<br>Debug: Rows found: " . $found_tickeritems ;
465
466//give out item_ids
467
468 return $tmp_result;
469}
470
471function process_delete_sql(){
472 global $wpdb, $bdprss_db, $bdprsssearchdebug;
473 if(strlen($this->bdprss_item_delete_sql)>0){
474 foreach($this->bdprss_searchtable as $table_to_search){
475 $deletesql = "DELETE FROM $table_to_search[0] WHERE item_id IN ( " . substr($this->bdprss_item_delete_sql,2) . " ) \n";
476 if($bdprsssearchdebug) echo "<br>Debug " . date("H:i:s") . ": Deleting from " . $table_to_search[0];
477 flush();
478 $result = $wpdb->query($deletesql);
479 }
480 if($this->process_deletes){
481 $deletesql = "DELETE FROM " . $this->bdprss_searchtable_status . " WHERE item_id IN ( " . substr($this->bdprss_item_delete_sql,2) . " ) \n";
482 if($bdprsssearchdebug) echo "<br>Debug " . date("H:i:s") . ": Deleting from statustable ... " . $deletesql;
483 flush();
484 $result = $wpdb->query($deletesql);
485 }
486 $this->bdprss_item_delete_sql="";
487 }
488}
489
490function process_bulk_sql(){
491 global $wpdb, $bdprss_db, $bdprsssearchdebug;
492 if(strlen($this->bdprss_item_delete_sql)>0){
493 $this->process_delete_sql();
494 }
495 if($bdprsssearchdebug) echo "<br>Debug " . date("H:i:s") . ": Inserting ... ";
496 flush();
497 foreach($this->bdprss_bulksql as $bdprss_bulksql_table => $bdprss_bulksql_value){
498 $bdprss_bulksql_value = "INSERT INTO " . $bdprss_bulksql_table . " (item_id, index_word , index_position) VALUES " . substr($bdprss_bulksql_value,2) . "\n";
499 $result = $wpdb->query($bdprss_bulksql_value);
500 }
501 if($bdprsssearchdebug) echo " Debug " . date("H:i:s") . ": inserts done.";
502 flush();
503 if($result && strlen($this->bdprss_item_status_sql)>0){
504 $this->bdprss_item_status_sql = "REPLACE INTO " . $this->bdprss_searchtable_status . "
505 Select identifier as item_id, md5(concat(item_site_name, ' ', item_name, ' ', text_body)) as md5, 'OK' as status, item_time, item_update_time from " . $bdprss_db->itemtable . "
506 WHERE identifier IN ( " . substr($this->bdprss_item_status_sql,2) . ")";
507 $result = $wpdb->query($this->bdprss_item_status_sql);
508 }
509 $this->bdprss_bulksql=array();
510 $this->bdprss_item_status_sql ="";
511}
512
513function markitem4update($item_id="", $md5tocompare=""){
514 $item_id=abs(intval($item_id));
515 global $wpdb, $bdprss_db, $bdprsssearchdebug;
516
517 if($item_id > 0 || $item_id == '0'){
518 $sql="UPDATE " . $this->bdprss_searchtable_status . " sts, " . $bdprss_db->itemtable . " i
519 SET sts.status = 'UPDATE'
520 WHERE sts.item_id = '".$item_id."'
521 AND sts.status = 'OK'
522 AND sts.md5 != '" . $md5tocompare . "'
523 AND sts.item_id = i.identifier
524 AND sts.md5 != md5( concat( i.item_site_name, ' ', i.item_name, ' ', i.text_body ) )
525 ";
526 $result = $wpdb->query($sql);
527 return $wpdb->rows_affected;
528 }
529
530 return false;
531}
532
533function markitem4delete($item_id=""){
534 $item_id=abs(intval($item_id));
535 global $wpdb, $bdprss_db, $bdprsssearchdebug;
536
537 if($item_id > 0 || $item_id == '0'){
538 $sql="UPDATE " . $this->bdprss_searchtable_status . "
539 SET status = 'DELETE'
540 WHERE item_id = '".$item_id."'";
541 $result = $wpdb->query($sql);
542 return true;
543 }
544
545 return false;
546}
547
548function markfeed4delete($feed_id="", $oldDefined=false){
549 global $wpdb, $bdprss_db, $bdprsssearchdebug;
550 $feed_id=abs(intval($feed_id));
551 $oldDefined=abs(intval($oldDefined));
552
553 if($oldDefined>1){
554 if($feed_id > 0 || $feed_id == '0'){
555 $sql="UPDATE " . $this->bdprss_searchtable_status . " sts, " . $bdprss_db->mitemtable . " mi
556 SET sts.status = 'DELETE'
557 WHERE sts.item_id = mi.identifier
558 AND mi.site_id = '" . $feed_id . "'
559 AND (mi.item_time < '" . $oldDefined . "' OR mi.item_time = '')";
560 $result = $wpdb->query($sql);
561 return true;
562 }
563 }else{
564 if($feed_id > 0 || $feed_id == '0'){
565 $sql="UPDATE " . $this->bdprss_searchtable_status . " sts, " . $bdprss_db->mitemtable . " mi
566 SET sts.status = 'DELETE'
567 WHERE sts.item_id = mi.identifier
568 AND mi.site_id = '" . $feed_id . "'";
569 $result = $wpdb->query($sql);
570 return true;
571 }
572 }
573 return false;
574}
575
576function get_ids4heap2add($heap_to_add=1, $get_ids4heap2add_mode="",
577 $get_ids4heap2add_min_updatetimeage="", $get_ids4heap2add_max_item_updatetimeage="", $list_id=0){
578 global $wpdb, $bdprss_db, $bdprsssearchdebug;
579
580 $list_id=abs(intval($list_id));
581 if($get_ids4heap2add_mode=="") $get_ids4heap2add_mode=$this->get_ids4heap2add_mode_default;
582
583 if($get_ids4heap2add_min_updatetimeage==="") $get_ids4heap2add_min_updatetimeage=$this->get_ids4heap2add_min_updatetimeage_default;
584 $get_ids4heap2add_min_updatetimeage=abs(intval($get_ids4heap2add_min_updatetimeage));
585 if($get_ids4heap2add_max_item_updatetimeage==="") $get_ids4heap2add_max_item_updatetimeage=$this->get_ids4heap2add_max_item_updatetimeage_default;
586 $get_ids4heap2add_max_item_updatetimeage=abs(intval($get_ids4heap2add_max_item_updatetimeage));
587
588 $list_condition="";
589 $maxagecondition="";
590
591 if($get_ids4heap2add_mode=="maxitem_id"){
592 if($list_id > 0) $list_condition=" AND site_id in (select sites.identifier AS site_id from (" . $bdprss_db->listtable . " lists join " . $bdprss_db->sitetable . " sites) where ((concat(_latin1',',lists.url_list,_latin1',') like concat(_utf8'%,',sites.identifier,_utf8',%')) or (lists.list_all = _latin1'Y')) and lists.identifier = '" . $list_id . "') \n";
593 $sql = "SELECT IFNULL(max(a.mval),0) from( \n";
594 foreach($this->bdprss_searchtable as $table_to_search){
595 $sql .= "SELECT IFNULL(max(item_id),0) mval FROM $table_to_search[0] union \n";
596 }
597 $sql .= "SELECT IFNULL(max(item_id),0) mval FROM $this->bdprss_searchtable_temp
598 ) as a";
599 $sql = "SELECT identifier FROM " . $bdprss_db->mitemtable . "
600 WHERE identifier > (" . $sql . ") " . $list_condition . " order by identifier limit 0, $heap_to_add";
601 } elseif($get_ids4heap2add_mode=="notinstatus"){
602//range scan not a problem when range is small
603//item_update_time > UNIX_TIMESTAMP() - 86400 ran 0.1 seconds in test with newly created index
604 if($list_id > 0) $list_condition=" AND i.item_feed_url IN (select sites.feed_url AS item_feed_url from (" . $bdprss_db->listtable . " lists join " . $bdprss_db->sitetable . " sites) where ((concat(_latin1',',lists.url_list,_latin1',') like concat(_utf8'%,',sites.identifier,_utf8',%')) or (lists.list_all = _latin1'Y')) and lists.identifier = '" . $list_id . "') \n";
605 if($get_ids4heap2add_max_item_updatetimeage > 0) $maxagecondition=" AnD i.item_update_time > UNIX_TIMESTAMP( ) - " . $get_ids4heap2add_max_item_updatetimeage . " \n";
606 $sql="SELECT i.identifier as identifier
607 FROM " . $bdprss_db->itemtable . " i
608 LEFT JOIN " . $this->bdprss_searchtable_status . " sts
609 ON i.identifier = sts.item_id
610 WHERE sts.item_id IS NULL
611 $maxagecondition
612 AND i.item_update_time < UNIX_TIMESTAMP( ) - " . $get_ids4heap2add_min_updatetimeage . "
613 $list_condition
614 ORDER BY identifier
615 LIMIT 0, $heap_to_add";
616 }elseif($get_ids4heap2add_mode=="processupdates"){
617 $this->process_updates=true;
618 $sql="SELECT i.identifier as identifier FROM " . $bdprss_db->itemtable . " i, " . $this->bdprss_searchtable_status . " sts
619 WHERE i.identifier = sts.item_id
620 AND sts.status = 'UPDATE'
621 AND i.item_update_time < UNIX_TIMESTAMP() - " . $get_ids4heap2add_min_updatetimeage . "
622 ORDER BY identifier LIMIT 0, $heap_to_add";
623 //echo "Debug: getitem_id_sql: <br>". $sql . "<br>";
624 }elseif($get_ids4heap2add_mode=="processdeletes"){
625 $this->process_deletes=true;
626 $sql="SELECT sts.item_id as identifier FROM " . $this->bdprss_searchtable_status . " sts
627 WHERE sts.status = 'DELETE'
628 ORDER BY identifier LIMIT 0, $heap_to_add";
629 //echo "Debug: getitem_id_sql: <br>". $sql . "<br>";
630 }
631
632
633 if($bdprsssearchdebug) echo "Debug: sql in get_ids4heap2add is: $sql";
634 if(strlen($sql)>0)$item_ids2add = $wpdb->get_results($sql);
635 //if($bdprsssearchdebug) print_r($item_ids2add);
636 $records_found=count($item_ids2add);
637 if($bdprsssearchdebug) echo " Records found: " . $records_found;
638 return $item_ids2add;
639}
640
641function add_heap2search_index($heap_to_add=1, $addheapmode="", $item_ids2add=false,
642 $insertfromtemptable=true, $bdprss_getidmode="", $list_id=0, $get_ids4heap2add_max_item_updatetimeage="", $get_ids4heap2add_min_updatetimeage=""){
643 //get max item id in search index
644 global $wpdb, $bdprss_db, $bdprsssearchdebug;
645
646 if($bdprss_db->highserverload || $bdprss_db->memtablesok!=1){
647 if($bdprsssearchdebug) echo "<br>Debug: Load threshold is at: " . $bdprss_db->serverstatus['pbaload']['notice'] . " ... exiting!";
648 flush();
649 sleep (abs(intval($bdprss_db->serverstatus['pbaload']['notice'])) * 10);
650 return false;
651 }
652
653 if($addheapmode == "") $addheapmode = $this->default_heapmode;
654 if($addheapmode == "bulk" && $this->bulklines == 0) return false;
655 if($item_ids2add===false) $item_ids2add=$this->get_ids4heap2add($heap_to_add, $bdprss_getidmode, $get_ids4heap2add_min_updatetimeage, $get_ids4heap2add_max_item_updatetimeage, $list_id);
656
657 //add_items in loop
658 $bdprss_rowcounter=0;
659 if(is_array($item_ids2add)) {
660 foreach($item_ids2add as $add_item_id){
661 $bdprss_rowcounter++;
662 if($bdprsssearchdebug) echo " Debug: #$bdprss_rowcounter: ID $add_item_id->identifier -";
663 flush();
664 if($bdprss_getidmode=="processdeletes"){
665 $this->delete_item_from_search($add_item_id->identifier);
666 } else {
667 $this->add_item_to_search($add_item_id->identifier, $addheapmode);
668 }
669 if($addheapmode == "bulk"){
670 if($bdprss_rowcounter/$this->bulklines == intval($bdprss_rowcounter/$this->bulklines)) {
671 if($bdprsssearchdebug) echo "Debug: Rows processed: " . $bdprss_rowcounter;
672 if(count($this->bdprss_bulksql)>0) $this->process_bulk_sql();
673 if(strlen($this->bdprss_item_delete_sql)>0) $this->process_delete_sql();
674 }
675 }
676 }
677 }
678 if($addheapmode == "temptable" && strlen($this->bdprss_item_status_sql)>0){
679 $this->bdprss_item_status_sql = "REPLACE into " . $this->bdprss_searchtable_status . "
680 Select identifier as item_id, md5(concat(item_site_name, ' ', item_name, ' ', text_body)) as md5, 'INTEMP' as status, item_time, item_update_time FROM " . $bdprss_db->itemtable . "
681 WHERE identifier IN ( " . substr($this->bdprss_item_status_sql,2) . ")";
682 if($bdprsssearchdebug) echo " Debug bdprss_item_status_sql: " . $this->bdprss_item_status_sql;
683 flush();
684 $result = $wpdb->query($this->bdprss_item_status_sql);
685 $this->bdprss_item_status_sql="";
686 }
687 if(!$this->process_deletes && $insertfromtemptable && $addheapmode == "temptable"){
688 if($bdprsssearchdebug) echo "<br>Debug: Copying temp_table entries to indexed tables ...";
689 flush();
690 foreach($this->bdprss_searchtable as $table_to_search){
691 //print_r($table_to_search);
692 if($bdprsssearchdebug) echo "<br>Debug: Working on table $table_to_search[0]";
693 flush();
694 if($this->process_updates){
695 if($bdprsssearchdebug) echo " Debug " . date("H:i:s") . ": Deleting - ";
696 $deletesql="DELETE FROM $table_to_search[0] USING $table_to_search[0] INNER JOIN $this->bdprss_searchtable_status
697 WHERE " . $table_to_search[0] . ".item_id = " . $this->bdprss_searchtable_status . ".item_id
698 AND " . $this->bdprss_searchtable_status . ".status = 'INTEMP'";
699 if($bdprsssearchdebug) echo " Debug sql: " . $deletesql;
700 flush();
701 $result = $wpdb->query($deletesql);
702 }
703 if($bdprsssearchdebug) echo " Debug " . date("H:i:s") . ": Inserting - ";
704 flush();
705 $sql="Insert into $table_to_search[0] SELECT i.item_id AS item_id, i.index_word AS index_word, i.index_position
706 FROM $this->bdprss_searchtable_temp i
707 WHERE ASCII( SUBSTRING( index_word , 1, 1 ) ) >= ASCII('$table_to_search[1]')
708 AND ASCII( SUBSTRING( index_word , 1, 1 ) ) < ASCII('$table_to_search[2]')";
709 $result = $wpdb->query($sql);
710 if($bdprsssearchdebug) echo " Debug " . date("H:i:s") . ": Done " . $table_to_search[0];
711 flush();
712 }
713 $sql=" TRUNCATE TABLE $this->bdprss_searchtable_temp";
714 $result = $wpdb->query($sql);
715 $sql="UPDATE " . $this->bdprss_searchtable_status . " SET status = 'OK' WHERE status = 'INTEMP'";
716 $result = $wpdb->query($sql);
717 } elseif($addheapmode == "bulk"){
718 if(count($this->bdprss_bulksql)>0) $this->process_bulk_sql();
719 }
720 if(strlen($this->bdprss_item_delete_sql)>0) $this->process_delete_sql();
721}
722
723//only for debug purpose so far
724function get_search_index_info(){
725 global $wpdb, $bdprss_db, $bdprsssearchdebug;
726
727 $sql = "SELECT status , count( status ) count
728 FROM " . $this->bdprss_searchtable_status . "
729 GROUP BY status";
730 $searchindex_status=$wpdb->get_results($sql);
731
732 if($bdprsssearchdebug) echo "\n<br><br>Debug: Status of search index: ";
733 if($bdprsssearchdebug) print_r($searchindex_status);
734
735 return $searchindex_status;
736}
737
738
739 } //class BDPRSS_SEARCH
740} //if class BDPRSS_SEARCH not exists
741
742// Make a singleton global instance.
743if ( !isset($bdprss_search) ) $bdprss_search = new BDPRSS_SEARCH();
744
745if($bdprsssearchdebug){
746 //function is broken in direct use mode, neither tmp tables nor bulk mode is executed then!!!
747 //deletion before insert still missing, too
748 //$bdprss_add_searchitem_id=abs(intval($_GET['additemid']));
749 $bdprss_addheap=abs(intval($_GET['addheap']));
750 $bdprss_create_proc=$_GET['createproc'];
751 $bdprss_searchphrase=stripslashes($_GET['searchphrase']);
752 $bdprss_heapmode=stripslashes($_GET['heapmode']);
753 if($bdprss_heapmode == "temp") $bdprss_heapmode = "temptable"; //just a short alias
754 $bdprss_getidmode=stripslashes($_GET['getidmode']);
755 if($bdprss_getidmode!="notinstatus" && $bdprss_getidmode!="maxitem_id" && $bdprss_getidmode!="processupdates" && $bdprss_getidmode!="processdeletes") {
756 $bdprss_getidmode="";
757 } else {
758 echo "Debug getidmode is $bdprss_getidmode ... ";
759 }
760 if(isset($_GET['mifd'])) $bdprss_mifd=abs(intval($_GET['mifd']));
761 if(isset($_GET['mifu'])) $bdprss_mifu=abs(intval($_GET['mifu']));
762 if(isset($_GET['listid'])) {
763 $bdprss_debug_list_id=abs(intval($_GET['listid']));
764 }else{
765 $bdprss_debug_list_id=0;
766 }
767 if(isset($_GET['maxage'])) {
768 $get_ids4heap2add_max_item_updatetimeage=abs(intval($_GET['maxage']));
769 }else{
770 $get_ids4heap2add_max_item_updatetimeage="";
771 }
772
773 if(isset($_GET['minage'])) {
774 $get_ids4heap2add_min_updatetimeage=abs(intval($_GET['minage']));
775 }else{
776 $get_ids4heap2add_min_updatetimeage="";
777 }
778
779
780 if($bdprss_mifd > 0 || $bdprss_mifd == '0'){
781 echo "Marking item #" . $bdprss_mifd . " for delete ... ";
782 $bdprss_debug_result=$bdprss_search->markitem4delete($bdprss_mifd);
783 echo "<br>Debugresult given out:<br>";
784 print_r($bdprss_debug_result);
785 }elseif($bdprss_mifu > 0 || $bdprss_mifu == '0'){
786 echo "Marking item #" . $bdprss_mifu . " for update ... ";
787 $bdprss_debug_result=$bdprss_search->markitem4update($bdprss_mifu);
788 echo "<br>Debugresult given out:<br>";
789 print_r($bdprss_debug_result);
790 }elseif($bdprss_create_proc > 0) {
791 echo "Recreating procedure ... ";
792 $bdprss_debug_result=$bdprss_search->bdprss_create_proc();
793 echo "<br>Debugresult given out:<br>";
794 print_r($bdprss_debug_result);
795 }elseif($bdprss_searchphrase != "") {
796 $bdprss_searchphrase=urldecode(str_replace("_","+",$bdprss_searchphrase));
797 echo "Searchphrase " . utf8_encode($bdprss_searchphrase) . " ... ";
798
799 $bdprss_search_result=$bdprss_search->bdprss_search4items(utf8_encode($bdprss_searchphrase),0, 20, false, $bdprss_debug_list_id);
800 echo "<br>Searchresult given out:<br>";
801 print_r($bdprss_search_result);
802 }elseif($bdprss_add_searchitem_id > 0) {
803 echo "item_id = $bdprss_add_searchitem_id";
804 //add_item_to_search($bdprss_add_searchitem_id); //broken !!! siehe oben
805 } elseif($bdprss_addheap > 0 || $bdprss_heapmode == "temptable") {
806 if($bdprss_addheap > $bdprss_search->heap_to_add) $bdprss_addheap = $bdprss_search->heap_to_add;
807 echo " processing $bdprss_addheap entries ...";
808
809 $heapmode=$bdprss_search->default_heapmode;
810 $insertfromtemptable=true;
811
812 if($bdprss_heapmode == "temptable") {
813 if($bdprss_getidmode!="processdeletes") echo " using temp table $bdprss_search->bdprss_searchtable_temp ...";
814 $heapmode="temptable";
815 } elseif($bdprss_heapmode == "justinsert2temptable") {
816 echo " inserting 2 temp table $bdprss_search->bdprss_searchtable_temp without copying temp table to productive tables...";
817 $heapmode="temptable";
818 $insertfromtemptable=false;
819 }
820 if($heapmode=="bulk") {
821 if($bdprss_search->bulklines>0) echo " using bulks of $bdprss_search->bulklines lines ...";
822 }
823 $bdprss_search->add_heap2search_index($bdprss_addheap, $heapmode, false, $insertfromtemptable, $bdprss_getidmode, $bdprss_debug_list_id, $get_ids4heap2add_max_item_updatetimeage, $get_ids4heap2add_min_updatetimeage);
824 } else {
825 echo " nothing to do, use GET switches to do something in debug mode";
826 }
827
828 if($bdprsssearchdebug_with_searchindex_info) $bdprss_search->get_search_index_info();
829 echo "<br><Br>Debug: $bdprss_search->bdprss_globalcounter Words. " . $wpdb->num_queries . " queries. " . number_format(timer_stop(),3) . " seconds. </small>";
830
831}// if bdprsssearchdebug
832
833
834
835?>
Note: See TracBrowser for help on using the repository browser.