source: trunk/www.guidonia.net/wp/wp-content/plugins/wordpress-google-seo-positioner/includes/class-snoopy.php@ 44

Last change on this file since 44 was 44, checked in by luciano, 14 years ago
File size: 36.8 KB
Line 
1<?php
2
3/*************************************************
4
5Snoopy - the PHP net client
6Author: Monte Ohrt <monte@ispi.net>
7Copyright (c): 1999-2000 ispi, all rights reserved
8Version: 1.01
9
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
24You may contact the author of Snoopy by e-mail at:
25monte@ispi.net
26
27Or, write to:
28Monte Ohrt
29CTO, ispi
30237 S. 70th suite 220
31Lincoln, NE 68510
32
33The latest version of Snoopy can be obtained from:
34http://snoopy.sourceforge.net/
35
36*************************************************/
37
38if ( !in_array('Snoopy', get_declared_classes() ) ) :
39class Snoopy
40{
41 /**** Public variables ****/
42
43 /* user definable vars */
44
45 var $host = "www.php.net"; // host name we are connecting to
46 var $port = 80; // port we are connecting to
47 var $proxy_host = ""; // proxy host to use
48 var $proxy_port = ""; // proxy port to use
49 var $proxy_user = ""; // proxy user to use
50 var $proxy_pass = ""; // proxy password to use
51
52 var $agent = "Snoopy v1.2.3"; // agent we masquerade as
53 var $referer = ""; // referer info to pass
54 var $cookies = array(); // array of cookies to pass
55 // $cookies["username"]="joe";
56 var $rawheaders = array(); // array of raw headers to send
57 // $rawheaders["Content-type"]="text/html";
58
59 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
60 var $lastredirectaddr = ""; // contains address of last redirected address
61 var $offsiteok = true; // allows redirection off-site
62 var $maxframes = 0; // frame content depth maximum. 0 = disallow
63 var $expandlinks = true; // expand links to fully qualified URLs.
64 // this only applies to fetchlinks()
65 // submitlinks(), and submittext()
66 var $passcookies = true; // pass set cookies back through redirects
67 // NOTE: this currently does not respect
68 // dates, domains or paths.
69
70 var $user = ""; // user for http authentication
71 var $pass = ""; // password for http authentication
72
73 // http accept types
74 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
75
76 var $results = ""; // where the content is put
77
78 var $error = ""; // error messages sent here
79 var $response_code = ""; // response code returned from server
80 var $headers = array(); // headers returned from server sent here
81 var $maxlength = 8192; // max return data length (body)
82 var $read_timeout = 0; // timeout on read operations, in seconds
83 // supported only since PHP 4 Beta 4
84 // set to 0 to disallow timeouts
85 var $timed_out = false; // if a read operation timed out
86 var $status = 0; // http request status
87
88 var $temp_dir = "/tmp"; // temporary directory that the webserver
89 // has permission to write to.
90 // under Windows, this should be C:\temp
91
92 var $curl_path = "/usr/local/bin/curl";
93 // Snoopy will use cURL for fetching
94 // SSL content if a full system path to
95 // the cURL binary is supplied here.
96 // set to false if you do not have
97 // cURL installed. See http://curl.haxx.se
98 // for details on installing cURL.
99 // Snoopy does *not* use the cURL
100 // library functions built into php,
101 // as these functions are not stable
102 // as of this Snoopy release.
103
104 /**** Private variables ****/
105
106 var $_maxlinelen = 4096; // max line length (headers)
107
108 var $_httpmethod = "GET"; // default http request method
109 var $_httpversion = "HTTP/1.0"; // default http request version
110 var $_submit_method = "POST"; // default submit method
111 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
112 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
113 var $_redirectaddr = false; // will be set if page fetched is a redirect
114 var $_redirectdepth = 0; // increments on an http redirect
115 var $_frameurls = array(); // frame src urls
116 var $_framedepth = 0; // increments on frame depth
117
118 var $_isproxy = false; // set if using a proxy server
119 var $_fp_timeout = 30; // timeout for socket connection
120
121/*======================================================================*\
122 Function: fetch
123 Purpose: fetch the contents of a web page
124 (and possibly other protocols in the
125 future like ftp, nntp, gopher, etc.)
126 Input: $URI the location of the page to fetch
127 Output: $this->results the output text from the fetch
128\*======================================================================*/
129
130 function fetch($URI)
131 {
132
133 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
134 $URI_PARTS = parse_url($URI);
135 if (!empty($URI_PARTS["user"]))
136 $this->user = $URI_PARTS["user"];
137 if (!empty($URI_PARTS["pass"]))
138 $this->pass = $URI_PARTS["pass"];
139 if (empty($URI_PARTS["query"]))
140 $URI_PARTS["query"] = '';
141 if (empty($URI_PARTS["path"]))
142 $URI_PARTS["path"] = '';
143
144 switch(strtolower($URI_PARTS["scheme"]))
145 {
146 case "http":
147 $this->host = $URI_PARTS["host"];
148 if(!empty($URI_PARTS["port"]))
149 $this->port = $URI_PARTS["port"];
150 if($this->_connect($fp))
151 {
152 if($this->_isproxy)
153 {
154 // using proxy, send entire URI
155 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
156 }
157 else
158 {
159 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
160 // no proxy, send only the path
161 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
162 }
163
164 $this->_disconnect($fp);
165
166 if($this->_redirectaddr)
167 {
168 /* url was redirected, check if we've hit the max depth */
169 if($this->maxredirs > $this->_redirectdepth)
170 {
171 // only follow redirect if it's on this site, or offsiteok is true
172 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
173 {
174 /* follow the redirect */
175 $this->_redirectdepth++;
176 $this->lastredirectaddr=$this->_redirectaddr;
177 $this->fetch($this->_redirectaddr);
178 }
179 }
180 }
181
182 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
183 {
184 $frameurls = $this->_frameurls;
185 $this->_frameurls = array();
186
187 while(list(,$frameurl) = each($frameurls))
188 {
189 if($this->_framedepth < $this->maxframes)
190 {
191 $this->fetch($frameurl);
192 $this->_framedepth++;
193 }
194 else
195 break;
196 }
197 }
198 }
199 else
200 {
201 return false;
202 }
203 return true;
204 break;
205 case "https":
206 if(!$this->curl_path)
207 return false;
208 if(function_exists("is_executable"))
209 if (!is_executable($this->curl_path))
210 return false;
211 $this->host = $URI_PARTS["host"];
212 if(!empty($URI_PARTS["port"]))
213 $this->port = $URI_PARTS["port"];
214 if($this->_isproxy)
215 {
216 // using proxy, send entire URI
217 $this->_httpsrequest($URI,$URI,$this->_httpmethod);
218 }
219 else
220 {
221 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
222 // no proxy, send only the path
223 $this->_httpsrequest($path, $URI, $this->_httpmethod);
224 }
225
226 if($this->_redirectaddr)
227 {
228 /* url was redirected, check if we've hit the max depth */
229 if($this->maxredirs > $this->_redirectdepth)
230 {
231 // only follow redirect if it's on this site, or offsiteok is true
232 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
233 {
234 /* follow the redirect */
235 $this->_redirectdepth++;
236 $this->lastredirectaddr=$this->_redirectaddr;
237 $this->fetch($this->_redirectaddr);
238 }
239 }
240 }
241
242 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
243 {
244 $frameurls = $this->_frameurls;
245 $this->_frameurls = array();
246
247 while(list(,$frameurl) = each($frameurls))
248 {
249 if($this->_framedepth < $this->maxframes)
250 {
251 $this->fetch($frameurl);
252 $this->_framedepth++;
253 }
254 else
255 break;
256 }
257 }
258 return true;
259 break;
260 default:
261 // not a valid protocol
262 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
263 return false;
264 break;
265 }
266 return true;
267 }
268
269/*======================================================================*\
270 Function: submit
271 Purpose: submit an http form
272 Input: $URI the location to post the data
273 $formvars the formvars to use.
274 format: $formvars["var"] = "val";
275 $formfiles an array of files to submit
276 format: $formfiles["var"] = "/dir/filename.ext";
277 Output: $this->results the text output from the post
278\*======================================================================*/
279
280 function submit($URI, $formvars="", $formfiles="")
281 {
282 unset($postdata);
283
284 $postdata = $this->_prepare_post_body($formvars, $formfiles);
285
286 $URI_PARTS = parse_url($URI);
287 if (!empty($URI_PARTS["user"]))
288 $this->user = $URI_PARTS["user"];
289 if (!empty($URI_PARTS["pass"]))
290 $this->pass = $URI_PARTS["pass"];
291 if (empty($URI_PARTS["query"]))
292 $URI_PARTS["query"] = '';
293 if (empty($URI_PARTS["path"]))
294 $URI_PARTS["path"] = '';
295
296 switch(strtolower($URI_PARTS["scheme"]))
297 {
298 case "http":
299 $this->host = $URI_PARTS["host"];
300 if(!empty($URI_PARTS["port"]))
301 $this->port = $URI_PARTS["port"];
302 if($this->_connect($fp))
303 {
304 if($this->_isproxy)
305 {
306 // using proxy, send entire URI
307 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
308 }
309 else
310 {
311 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
312 // no proxy, send only the path
313 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
314 }
315
316 $this->_disconnect($fp);
317
318 if($this->_redirectaddr)
319 {
320 /* url was redirected, check if we've hit the max depth */
321 if($this->maxredirs > $this->_redirectdepth)
322 {
323 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
324 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
325
326 // only follow redirect if it's on this site, or offsiteok is true
327 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
328 {
329 /* follow the redirect */
330 $this->_redirectdepth++;
331 $this->lastredirectaddr=$this->_redirectaddr;
332 if( strpos( $this->_redirectaddr, "?" ) > 0 )
333 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
334 else
335 $this->submit($this->_redirectaddr,$formvars, $formfiles);
336 }
337 }
338 }
339
340 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
341 {
342 $frameurls = $this->_frameurls;
343 $this->_frameurls = array();
344
345 while(list(,$frameurl) = each($frameurls))
346 {
347 if($this->_framedepth < $this->maxframes)
348 {
349 $this->fetch($frameurl);
350 $this->_framedepth++;
351 }
352 else
353 break;
354 }
355 }
356
357 }
358 else
359 {
360 return false;
361 }
362 return true;
363 break;
364 case "https":
365 if(!$this->curl_path)
366 return false;
367 if(function_exists("is_executable"))
368 if (!is_executable($this->curl_path))
369 return false;
370 $this->host = $URI_PARTS["host"];
371 if(!empty($URI_PARTS["port"]))
372 $this->port = $URI_PARTS["port"];
373 if($this->_isproxy)
374 {
375 // using proxy, send entire URI
376 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
377 }
378 else
379 {
380 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
381 // no proxy, send only the path
382 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
383 }
384
385 if($this->_redirectaddr)
386 {
387 /* url was redirected, check if we've hit the max depth */
388 if($this->maxredirs > $this->_redirectdepth)
389 {
390 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
391 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
392
393 // only follow redirect if it's on this site, or offsiteok is true
394 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
395 {
396 /* follow the redirect */
397 $this->_redirectdepth++;
398 $this->lastredirectaddr=$this->_redirectaddr;
399 if( strpos( $this->_redirectaddr, "?" ) > 0 )
400 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
401 else
402 $this->submit($this->_redirectaddr,$formvars, $formfiles);
403 }
404 }
405 }
406
407 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
408 {
409 $frameurls = $this->_frameurls;
410 $this->_frameurls = array();
411
412 while(list(,$frameurl) = each($frameurls))
413 {
414 if($this->_framedepth < $this->maxframes)
415 {
416 $this->fetch($frameurl);
417 $this->_framedepth++;
418 }
419 else
420 break;
421 }
422 }
423 return true;
424 break;
425
426 default:
427 // not a valid protocol
428 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
429 return false;
430 break;
431 }
432 return true;
433 }
434
435/*======================================================================*\
436 Function: fetchlinks
437 Purpose: fetch the links from a web page
438 Input: $URI where you are fetching from
439 Output: $this->results an array of the URLs
440\*======================================================================*/
441
442 function fetchlinks($URI)
443 {
444 if ($this->fetch($URI))
445 {
446 if($this->lastredirectaddr)
447 $URI = $this->lastredirectaddr;
448 if(is_array($this->results))
449 {
450 for($x=0;$x<count($this->results);$x++)
451 $this->results[$x] = $this->_striplinks($this->results[$x]);
452 }
453 else
454 $this->results = $this->_striplinks($this->results);
455
456 if($this->expandlinks)
457 $this->results = $this->_expandlinks($this->results, $URI);
458 return true;
459 }
460 else
461 return false;
462 }
463
464/*======================================================================*\
465 Function: fetchform
466 Purpose: fetch the form elements from a web page
467 Input: $URI where you are fetching from
468 Output: $this->results the resulting html form
469\*======================================================================*/
470
471 function fetchform($URI)
472 {
473
474 if ($this->fetch($URI))
475 {
476
477 if(is_array($this->results))
478 {
479 for($x=0;$x<count($this->results);$x++)
480 $this->results[$x] = $this->_stripform($this->results[$x]);
481 }
482 else
483 $this->results = $this->_stripform($this->results);
484
485 return true;
486 }
487 else
488 return false;
489 }
490
491
492/*======================================================================*\
493 Function: fetchtext
494 Purpose: fetch the text from a web page, stripping the links
495 Input: $URI where you are fetching from
496 Output: $this->results the text from the web page
497\*======================================================================*/
498
499 function fetchtext($URI)
500 {
501 if($this->fetch($URI))
502 {
503 if(is_array($this->results))
504 {
505 for($x=0;$x<count($this->results);$x++)
506 $this->results[$x] = $this->_striptext($this->results[$x]);
507 }
508 else
509 $this->results = $this->_striptext($this->results);
510 return true;
511 }
512 else
513 return false;
514 }
515
516/*======================================================================*\
517 Function: submitlinks
518 Purpose: grab links from a form submission
519 Input: $URI where you are submitting from
520 Output: $this->results an array of the links from the post
521\*======================================================================*/
522
523 function submitlinks($URI, $formvars="", $formfiles="")
524 {
525 if($this->submit($URI,$formvars, $formfiles))
526 {
527 if($this->lastredirectaddr)
528 $URI = $this->lastredirectaddr;
529 if(is_array($this->results))
530 {
531 for($x=0;$x<count($this->results);$x++)
532 {
533 $this->results[$x] = $this->_striplinks($this->results[$x]);
534 if($this->expandlinks)
535 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
536 }
537 }
538 else
539 {
540 $this->results = $this->_striplinks($this->results);
541 if($this->expandlinks)
542 $this->results = $this->_expandlinks($this->results,$URI);
543 }
544 return true;
545 }
546 else
547 return false;
548 }
549
550/*======================================================================*\
551 Function: submittext
552 Purpose: grab text from a form submission
553 Input: $URI where you are submitting from
554 Output: $this->results the text from the web page
555\*======================================================================*/
556
557 function submittext($URI, $formvars = "", $formfiles = "")
558 {
559 if($this->submit($URI,$formvars, $formfiles))
560 {
561 if($this->lastredirectaddr)
562 $URI = $this->lastredirectaddr;
563 if(is_array($this->results))
564 {
565 for($x=0;$x<count($this->results);$x++)
566 {
567 $this->results[$x] = $this->_striptext($this->results[$x]);
568 if($this->expandlinks)
569 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
570 }
571 }
572 else
573 {
574 $this->results = $this->_striptext($this->results);
575 if($this->expandlinks)
576 $this->results = $this->_expandlinks($this->results,$URI);
577 }
578 return true;
579 }
580 else
581 return false;
582 }
583
584
585
586/*======================================================================*\
587 Function: set_submit_multipart
588 Purpose: Set the form submission content type to
589 multipart/form-data
590\*======================================================================*/
591 function set_submit_multipart()
592 {
593 $this->_submit_type = "multipart/form-data";
594 }
595
596
597/*======================================================================*\
598 Function: set_submit_normal
599 Purpose: Set the form submission content type to
600 application/x-www-form-urlencoded
601\*======================================================================*/
602 function set_submit_normal()
603 {
604 $this->_submit_type = "application/x-www-form-urlencoded";
605 }
606
607
608
609
610/*======================================================================*\
611 Private functions
612\*======================================================================*/
613
614
615/*======================================================================*\
616 Function: _striplinks
617 Purpose: strip the hyperlinks from an html document
618 Input: $document document to strip.
619 Output: $match an array of the links
620\*======================================================================*/
621
622 function _striplinks($document)
623 {
624 preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
625 ([\"\'])? # find single or double quote
626 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
627 # quote, otherwise match up to next space
628 'isx",$document,$links);
629
630
631 // catenate the non-empty matches from the conditional subpattern
632
633 while(list($key,$val) = each($links[2]))
634 {
635 if(!empty($val))
636 $match[] = $val;
637 }
638
639 while(list($key,$val) = each($links[3]))
640 {
641 if(!empty($val))
642 $match[] = $val;
643 }
644
645 // return the links
646 return $match;
647 }
648
649/*======================================================================*\
650 Function: _stripform
651 Purpose: strip the form elements from an html document
652 Input: $document document to strip.
653 Output: $match an array of the links
654\*======================================================================*/
655
656 function _stripform($document)
657 {
658 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
659
660 // catenate the matches
661 $match = implode("\r\n",$elements[0]);
662
663 // return the links
664 return $match;
665 }
666
667
668
669/*======================================================================*\
670 Function: _striptext
671 Purpose: strip the text from an html document
672 Input: $document document to strip.
673 Output: $text the resulting text
674\*======================================================================*/
675
676 function _striptext($document)
677 {
678
679 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
680 // so, list your entities one by one here. I included some of the
681 // more common ones.
682
683 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
684 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
685 "'([\r\n])[\s]+'", // strip out white space
686 "'&(quot|#34|#034|#x22);'i", // replace html entities
687 "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
688 "'&(lt|#60|#060|#x3c);'i",
689 "'&(gt|#62|#062|#x3e);'i",
690 "'&(nbsp|#160|#xa0);'i",
691 "'&(iexcl|#161);'i",
692 "'&(cent|#162);'i",
693 "'&(pound|#163);'i",
694 "'&(copy|#169);'i",
695 "'&(reg|#174);'i",
696 "'&(deg|#176);'i",
697 "'&(#39|#039|#x27);'",
698 "'&(euro|#8364);'i", // europe
699 "'&a(uml|UML);'", // german
700 "'&o(uml|UML);'",
701 "'&u(uml|UML);'",
702 "'&A(uml|UML);'",
703 "'&O(uml|UML);'",
704 "'&U(uml|UML);'",
705 "'&szlig;'i",
706 );
707 $replace = array( "",
708 "",
709 "\\1",
710 "\"",
711 "&",
712 "<",
713 ">",
714 " ",
715 chr(161),
716 chr(162),
717 chr(163),
718 chr(169),
719 chr(174),
720 chr(176),
721 chr(39),
722 chr(128),
723 "À",
724 "ö",
725 "Ì",
726 "Ä",
727 "Ö",
728 "Ü",
729 "ß",
730 );
731
732 $text = preg_replace($search,$replace,$document);
733
734 return $text;
735 }
736
737/*======================================================================*\
738 Function: _expandlinks
739 Purpose: expand each link into a fully qualified URL
740 Input: $links the links to qualify
741 $URI the full URI to get the base from
742 Output: $expandedLinks the expanded links
743\*======================================================================*/
744
745 function _expandlinks($links,$URI)
746 {
747
748 preg_match("/^[^\?]+/",$URI,$match);
749
750 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
751 $match = preg_replace("|/$|","",$match);
752 $match_part = parse_url($match);
753 $match_root =
754 $match_part["scheme"]."://".$match_part["host"];
755
756 $search = array( "|^http://".preg_quote($this->host)."|i",
757 "|^(\/)|i",
758 "|^(?!http://)(?!mailto:)|i",
759 "|/\./|",
760 "|/[^\/]+/\.\./|"
761 );
762
763 $replace = array( "",
764 $match_root."/",
765 $match."/",
766 "/",
767 "/"
768 );
769
770 $expandedLinks = preg_replace($search,$replace,$links);
771
772 return $expandedLinks;
773 }
774
775/*======================================================================*\
776 Function: _httprequest
777 Purpose: go get the http data from the server
778 Input: $url the url to fetch
779 $fp the current open file pointer
780 $URI the full URI
781 $body body contents to send if any (POST)
782 Output:
783\*======================================================================*/
784
785 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
786 {
787 $cookie_headers = '';
788 if($this->passcookies && $this->_redirectaddr)
789 $this->setcookies();
790
791 $URI_PARTS = parse_url($URI);
792 if(empty($url))
793 $url = "/";
794 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
795 if(!empty($this->agent))
796 $headers .= "User-Agent: ".$this->agent."\r\n";
797 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
798 $headers .= "Host: ".$this->host;
799 if(!empty($this->port) && $this->port != 80)
800 $headers .= ":".$this->port;
801 $headers .= "\r\n";
802 }
803 if(!empty($this->accept))
804 $headers .= "Accept: ".$this->accept."\r\n";
805 if(!empty($this->referer))
806 $headers .= "Referer: ".$this->referer."\r\n";
807 if(!empty($this->cookies))
808 {
809 if(!is_array($this->cookies))
810 $this->cookies = (array)$this->cookies;
811
812 reset($this->cookies);
813 if ( count($this->cookies) > 0 ) {
814 $cookie_headers .= 'Cookie: ';
815 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
816 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
817 }
818 $headers .= substr($cookie_headers,0,-2) . "\r\n";
819 }
820 }
821 if(!empty($this->rawheaders))
822 {
823 if(!is_array($this->rawheaders))
824 $this->rawheaders = (array)$this->rawheaders;
825 while(list($headerKey,$headerVal) = each($this->rawheaders))
826 $headers .= $headerKey.": ".$headerVal."\r\n";
827 }
828 if(!empty($content_type)) {
829 $headers .= "Content-type: $content_type";
830 if ($content_type == "multipart/form-data")
831 $headers .= "; boundary=".$this->_mime_boundary;
832 $headers .= "\r\n";
833 }
834 if(!empty($body))
835 $headers .= "Content-length: ".strlen($body)."\r\n";
836 if(!empty($this->user) || !empty($this->pass))
837 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
838
839 //add proxy auth headers
840 if(!empty($this->proxy_user))
841 $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
842
843
844 $headers .= "\r\n";
845
846 // set the read timeout if needed
847 if ($this->read_timeout > 0)
848 socket_set_timeout($fp, $this->read_timeout);
849 $this->timed_out = false;
850
851 fwrite($fp,$headers.$body,strlen($headers.$body));
852
853 $this->_redirectaddr = false;
854 unset($this->headers);
855
856 while($currentHeader = fgets($fp,$this->_maxlinelen))
857 {
858 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
859 {
860 $this->status=-100;
861 return false;
862 }
863
864 if($currentHeader == "\r\n")
865 break;
866
867 // if a header begins with Location: or URI:, set the redirect
868 if(preg_match("/^(Location:|URI:)/i",$currentHeader))
869 {
870 // get URL portion of the redirect
871 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
872 // look for :// in the Location header to see if hostname is included
873 if(!preg_match("|\:\/\/|",$matches[2]))
874 {
875 // no host in the path, so prepend
876 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
877 // eliminate double slash
878 if(!preg_match("|^/|",$matches[2]))
879 $this->_redirectaddr .= "/".$matches[2];
880 else
881 $this->_redirectaddr .= $matches[2];
882 }
883 else
884 $this->_redirectaddr = $matches[2];
885 }
886
887 if(preg_match("|^HTTP/|",$currentHeader))
888 {
889 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
890 {
891 $this->status= $status[1];
892 }
893 $this->response_code = $currentHeader;
894 }
895
896 $this->headers[] = $currentHeader;
897 }
898
899 $results = '';
900 do {
901 $_data = fread($fp, $this->maxlength);
902 if (strlen($_data) == 0) {
903 break;
904 }
905 $results .= $_data;
906 } while(true);
907
908 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
909 {
910 $this->status=-100;
911 return false;
912 }
913
914 // check if there is a a redirect meta tag
915
916 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
917
918 {
919 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
920 }
921
922 // have we hit our frame depth and is there frame src to fetch?
923 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
924 {
925 $this->results[] = $results;
926 for($x=0; $x<count($match[1]); $x++)
927 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
928 }
929 // have we already fetched framed content?
930 elseif(is_array($this->results))
931 $this->results[] = $results;
932 // no framed content
933 else
934 $this->results = $results;
935
936 return true;
937 }
938
939/*======================================================================*\
940 Function: _httpsrequest
941 Purpose: go get the https data from the server using curl
942 Input: $url the url to fetch
943 $URI the full URI
944 $body body contents to send if any (POST)
945 Output:
946\*======================================================================*/
947
948 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
949 {
950 if($this->passcookies && $this->_redirectaddr)
951 $this->setcookies();
952
953 $headers = array();
954
955 $URI_PARTS = parse_url($URI);
956 if(empty($url))
957 $url = "/";
958 // GET ... header not needed for curl
959 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
960 if(!empty($this->agent))
961 $headers[] = "User-Agent: ".$this->agent;
962 if(!empty($this->host))
963 if(!empty($this->port))
964 $headers[] = "Host: ".$this->host.":".$this->port;
965 else
966 $headers[] = "Host: ".$this->host;
967 if(!empty($this->accept))
968 $headers[] = "Accept: ".$this->accept;
969 if(!empty($this->referer))
970 $headers[] = "Referer: ".$this->referer;
971 if(!empty($this->cookies))
972 {
973 if(!is_array($this->cookies))
974 $this->cookies = (array)$this->cookies;
975
976 reset($this->cookies);
977 if ( count($this->cookies) > 0 ) {
978 $cookie_str = 'Cookie: ';
979 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
980 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
981 }
982 $headers[] = substr($cookie_str,0,-2);
983 }
984 }
985 if(!empty($this->rawheaders))
986 {
987 if(!is_array($this->rawheaders))
988 $this->rawheaders = (array)$this->rawheaders;
989 while(list($headerKey,$headerVal) = each($this->rawheaders))
990 $headers[] = $headerKey.": ".$headerVal;
991 }
992 if(!empty($content_type)) {
993 if ($content_type == "multipart/form-data")
994 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
995 else
996 $headers[] = "Content-type: $content_type";
997 }
998 if(!empty($body))
999 $headers[] = "Content-length: ".strlen($body);
1000 if(!empty($this->user) || !empty($this->pass))
1001 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1002
1003 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1004 $safer_header = strtr( $headers[$curr_header], "\"", " " );
1005 $cmdline_params .= " -H \"".$safer_header."\"";
1006 }
1007
1008 if(!empty($body))
1009 $cmdline_params .= " -d \"$body\"";
1010
1011 if($this->read_timeout > 0)
1012 $cmdline_params .= " -m ".$this->read_timeout;
1013
1014 $headerfile = tempnam($temp_dir, "sno");
1015
1016 $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1017 exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
1018
1019 if($return)
1020 {
1021 $this->error = "Error: cURL could not retrieve the document, error $return.";
1022 return false;
1023 }
1024
1025
1026 $results = implode("\r\n",$results);
1027
1028 $result_headers = file("$headerfile");
1029
1030 $this->_redirectaddr = false;
1031 unset($this->headers);
1032
1033 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1034 {
1035
1036 // if a header begins with Location: or URI:, set the redirect
1037 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1038 {
1039 // get URL portion of the redirect
1040 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1041 // look for :// in the Location header to see if hostname is included
1042 if(!preg_match("|\:\/\/|",$matches[2]))
1043 {
1044 // no host in the path, so prepend
1045 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1046 // eliminate double slash
1047 if(!preg_match("|^/|",$matches[2]))
1048 $this->_redirectaddr .= "/".$matches[2];
1049 else
1050 $this->_redirectaddr .= $matches[2];
1051 }
1052 else
1053 $this->_redirectaddr = $matches[2];
1054 }
1055
1056 if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1057 $this->response_code = $result_headers[$currentHeader];
1058
1059 $this->headers[] = $result_headers[$currentHeader];
1060 }
1061
1062 // check if there is a a redirect meta tag
1063
1064 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1065 {
1066 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1067 }
1068
1069 // have we hit our frame depth and is there frame src to fetch?
1070 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1071 {
1072 $this->results[] = $results;
1073 for($x=0; $x<count($match[1]); $x++)
1074 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1075 }
1076 // have we already fetched framed content?
1077 elseif(is_array($this->results))
1078 $this->results[] = $results;
1079 // no framed content
1080 else
1081 $this->results = $results;
1082
1083 unlink("$headerfile");
1084
1085 return true;
1086 }
1087
1088/*======================================================================*\
1089 Function: setcookies()
1090 Purpose: set cookies for a redirection
1091\*======================================================================*/
1092
1093 function setcookies()
1094 {
1095 for($x=0; $x<count($this->headers); $x++)
1096 {
1097 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1098 $this->cookies[$match[1]] = urldecode($match[2]);
1099 }
1100 }
1101
1102
1103/*======================================================================*\
1104 Function: _check_timeout
1105 Purpose: checks whether timeout has occurred
1106 Input: $fp file pointer
1107\*======================================================================*/
1108
1109 function _check_timeout($fp)
1110 {
1111 if ($this->read_timeout > 0) {
1112 $fp_status = socket_get_status($fp);
1113 if ($fp_status["timed_out"]) {
1114 $this->timed_out = true;
1115 return true;
1116 }
1117 }
1118 return false;
1119 }
1120
1121/*======================================================================*\
1122 Function: _connect
1123 Purpose: make a socket connection
1124 Input: $fp file pointer
1125\*======================================================================*/
1126
1127 function _connect(&$fp)
1128 {
1129 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1130 {
1131 $this->_isproxy = true;
1132
1133 $host = $this->proxy_host;
1134 $port = $this->proxy_port;
1135 }
1136 else
1137 {
1138 $host = $this->host;
1139 $port = $this->port;
1140 }
1141
1142 $this->status = 0;
1143
1144 if($fp = fsockopen(
1145 $host,
1146 $port,
1147 $errno,
1148 $errstr,
1149 $this->_fp_timeout
1150 ))
1151 {
1152 // socket connection succeeded
1153
1154 return true;
1155 }
1156 else
1157 {
1158 // socket connection failed
1159 $this->status = $errno;
1160 switch($errno)
1161 {
1162 case -3:
1163 $this->error="socket creation failed (-3)";
1164 case -4:
1165 $this->error="dns lookup failure (-4)";
1166 case -5:
1167 $this->error="connection refused or timed out (-5)";
1168 default:
1169 $this->error="connection failed (".$errno.")";
1170 }
1171 return false;
1172 }
1173 }
1174/*======================================================================*\
1175 Function: _disconnect
1176 Purpose: disconnect a socket connection
1177 Input: $fp file pointer
1178\*======================================================================*/
1179
1180 function _disconnect($fp)
1181 {
1182 return(fclose($fp));
1183 }
1184
1185
1186/*======================================================================*\
1187 Function: _prepare_post_body
1188 Purpose: Prepare post body according to encoding type
1189 Input: $formvars - form variables
1190 $formfiles - form upload files
1191 Output: post body
1192\*======================================================================*/
1193
1194 function _prepare_post_body($formvars, $formfiles)
1195 {
1196 settype($formvars, "array");
1197 settype($formfiles, "array");
1198 $postdata = '';
1199
1200 if (count($formvars) == 0 && count($formfiles) == 0)
1201 return;
1202
1203 switch ($this->_submit_type) {
1204 case "application/x-www-form-urlencoded":
1205 reset($formvars);
1206 while(list($key,$val) = each($formvars)) {
1207 if (is_array($val) || is_object($val)) {
1208 while (list($cur_key, $cur_val) = each($val)) {
1209 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1210 }
1211 } else
1212 $postdata .= urlencode($key)."=".urlencode($val)."&";
1213 }
1214 break;
1215
1216 case "multipart/form-data":
1217 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1218
1219 reset($formvars);
1220 while(list($key,$val) = each($formvars)) {
1221 if (is_array($val) || is_object($val)) {
1222 while (list($cur_key, $cur_val) = each($val)) {
1223 $postdata .= "--".$this->_mime_boundary."\r\n";
1224 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1225 $postdata .= "$cur_val\r\n";
1226 }
1227 } else {
1228 $postdata .= "--".$this->_mime_boundary."\r\n";
1229 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1230 $postdata .= "$val\r\n";
1231 }
1232 }
1233
1234 reset($formfiles);
1235 while (list($field_name, $file_names) = each($formfiles)) {
1236 settype($file_names, "array");
1237 while (list(, $file_name) = each($file_names)) {
1238 if (!is_readable($file_name)) continue;
1239
1240 $fp = fopen($file_name, "r");
1241 while (!feof($fp)) {
1242 $file_content .= fread($fp, filesize($file_name));
1243 }
1244 fclose($fp);
1245 $base_name = basename($file_name);
1246
1247 $postdata .= "--".$this->_mime_boundary."\r\n";
1248 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1249 $postdata .= "$file_content\r\n";
1250 }
1251 }
1252 $postdata .= "--".$this->_mime_boundary."--\r\n";
1253 break;
1254 }
1255
1256 return $postdata;
1257 }
1258}
1259endif;
1260
1261?>
Note: See TracBrowser for help on using the repository browser.