source: trunk/www.guidonia.net/wp/wp-includes/class-snoopy.php@ 44

Last change on this file since 44 was 44, checked in by luciano, 15 years ago
File size: 36.6 KB
Line 
1<?php
2if ( !in_array('Snoopy', get_declared_classes() ) ) :
3/*************************************************
4
5Snoopy - the PHP net client
6Author: Monte Ohrt <monte@ispi.net>
7Copyright (c): 1999-2008 New Digital Group, all rights reserved
8Version: 1.2.4
9
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23
24You may contact the author of Snoopy by e-mail at:
25monte@ohrt.com
26
27The latest version of Snoopy can be obtained from:
28http://snoopy.sourceforge.net/
29
30*************************************************/
31
32class Snoopy
33{
34 /**** Public variables ****/
35
36 /* user definable vars */
37
38 var $host = "www.php.net"; // host name we are connecting to
39 var $port = 80; // port we are connecting to
40 var $proxy_host = ""; // proxy host to use
41 var $proxy_port = ""; // proxy port to use
42 var $proxy_user = ""; // proxy user to use
43 var $proxy_pass = ""; // proxy password to use
44
45 var $agent = "Snoopy v1.2.4"; // agent we masquerade as
46 var $referer = ""; // referer info to pass
47 var $cookies = array(); // array of cookies to pass
48 // $cookies["username"]="joe";
49 var $rawheaders = array(); // array of raw headers to send
50 // $rawheaders["Content-type"]="text/html";
51
52 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
53 var $lastredirectaddr = ""; // contains address of last redirected address
54 var $offsiteok = true; // allows redirection off-site
55 var $maxframes = 0; // frame content depth maximum. 0 = disallow
56 var $expandlinks = true; // expand links to fully qualified URLs.
57 // this only applies to fetchlinks()
58 // submitlinks(), and submittext()
59 var $passcookies = true; // pass set cookies back through redirects
60 // NOTE: this currently does not respect
61 // dates, domains or paths.
62
63 var $user = ""; // user for http authentication
64 var $pass = ""; // password for http authentication
65
66 // http accept types
67 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
68
69 var $results = ""; // where the content is put
70
71 var $error = ""; // error messages sent here
72 var $response_code = ""; // response code returned from server
73 var $headers = array(); // headers returned from server sent here
74 var $maxlength = 500000; // max return data length (body)
75 var $read_timeout = 0; // timeout on read operations, in seconds
76 // supported only since PHP 4 Beta 4
77 // set to 0 to disallow timeouts
78 var $timed_out = false; // if a read operation timed out
79 var $status = 0; // http request status
80
81 var $temp_dir = "/tmp"; // temporary directory that the webserver
82 // has permission to write to.
83 // under Windows, this should be C:\temp
84
85 var $curl_path = "/usr/local/bin/curl";
86 // Snoopy will use cURL for fetching
87 // SSL content if a full system path to
88 // the cURL binary is supplied here.
89 // set to false if you do not have
90 // cURL installed. See http://curl.haxx.se
91 // for details on installing cURL.
92 // Snoopy does *not* use the cURL
93 // library functions built into php,
94 // as these functions are not stable
95 // as of this Snoopy release.
96
97 /**** Private variables ****/
98
99 var $_maxlinelen = 4096; // max line length (headers)
100
101 var $_httpmethod = "GET"; // default http request method
102 var $_httpversion = "HTTP/1.0"; // default http request version
103 var $_submit_method = "POST"; // default submit method
104 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
105 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
106 var $_redirectaddr = false; // will be set if page fetched is a redirect
107 var $_redirectdepth = 0; // increments on an http redirect
108 var $_frameurls = array(); // frame src urls
109 var $_framedepth = 0; // increments on frame depth
110
111 var $_isproxy = false; // set if using a proxy server
112 var $_fp_timeout = 30; // timeout for socket connection
113
114/*======================================================================*\
115 Function: fetch
116 Purpose: fetch the contents of a web page
117 (and possibly other protocols in the
118 future like ftp, nntp, gopher, etc.)
119 Input: $URI the location of the page to fetch
120 Output: $this->results the output text from the fetch
121\*======================================================================*/
122
123 function fetch($URI)
124 {
125
126 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
127 $URI_PARTS = parse_url($URI);
128 if (!empty($URI_PARTS["user"]))
129 $this->user = $URI_PARTS["user"];
130 if (!empty($URI_PARTS["pass"]))
131 $this->pass = $URI_PARTS["pass"];
132 if (empty($URI_PARTS["query"]))
133 $URI_PARTS["query"] = '';
134 if (empty($URI_PARTS["path"]))
135 $URI_PARTS["path"] = '';
136
137 switch(strtolower($URI_PARTS["scheme"]))
138 {
139 case "http":
140 $this->host = $URI_PARTS["host"];
141 if(!empty($URI_PARTS["port"]))
142 $this->port = $URI_PARTS["port"];
143 if($this->_connect($fp))
144 {
145 if($this->_isproxy)
146 {
147 // using proxy, send entire URI
148 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
149 }
150 else
151 {
152 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
153 // no proxy, send only the path
154 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
155 }
156
157 $this->_disconnect($fp);
158
159 if($this->_redirectaddr)
160 {
161 /* url was redirected, check if we've hit the max depth */
162 if($this->maxredirs > $this->_redirectdepth)
163 {
164 // only follow redirect if it's on this site, or offsiteok is true
165 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
166 {
167 /* follow the redirect */
168 $this->_redirectdepth++;
169 $this->lastredirectaddr=$this->_redirectaddr;
170 $this->fetch($this->_redirectaddr);
171 }
172 }
173 }
174
175 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
176 {
177 $frameurls = $this->_frameurls;
178 $this->_frameurls = array();
179
180 while(list(,$frameurl) = each($frameurls))
181 {
182 if($this->_framedepth < $this->maxframes)
183 {
184 $this->fetch($frameurl);
185 $this->_framedepth++;
186 }
187 else
188 break;
189 }
190 }
191 }
192 else
193 {
194 return false;
195 }
196 return true;
197 break;
198 case "https":
199 if(!$this->curl_path)
200 return false;
201 if(function_exists("is_executable"))
202 if (!is_executable($this->curl_path))
203 return false;
204 $this->host = $URI_PARTS["host"];
205 if(!empty($URI_PARTS["port"]))
206 $this->port = $URI_PARTS["port"];
207 if($this->_isproxy)
208 {
209 // using proxy, send entire URI
210 $this->_httpsrequest($URI,$URI,$this->_httpmethod);
211 }
212 else
213 {
214 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
215 // no proxy, send only the path
216 $this->_httpsrequest($path, $URI, $this->_httpmethod);
217 }
218
219 if($this->_redirectaddr)
220 {
221 /* url was redirected, check if we've hit the max depth */
222 if($this->maxredirs > $this->_redirectdepth)
223 {
224 // only follow redirect if it's on this site, or offsiteok is true
225 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
226 {
227 /* follow the redirect */
228 $this->_redirectdepth++;
229 $this->lastredirectaddr=$this->_redirectaddr;
230 $this->fetch($this->_redirectaddr);
231 }
232 }
233 }
234
235 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
236 {
237 $frameurls = $this->_frameurls;
238 $this->_frameurls = array();
239
240 while(list(,$frameurl) = each($frameurls))
241 {
242 if($this->_framedepth < $this->maxframes)
243 {
244 $this->fetch($frameurl);
245 $this->_framedepth++;
246 }
247 else
248 break;
249 }
250 }
251 return true;
252 break;
253 default:
254 // not a valid protocol
255 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
256 return false;
257 break;
258 }
259 return true;
260 }
261
262/*======================================================================*\
263 Function: submit
264 Purpose: submit an http form
265 Input: $URI the location to post the data
266 $formvars the formvars to use.
267 format: $formvars["var"] = "val";
268 $formfiles an array of files to submit
269 format: $formfiles["var"] = "/dir/filename.ext";
270 Output: $this->results the text output from the post
271\*======================================================================*/
272
273 function submit($URI, $formvars="", $formfiles="")
274 {
275 unset($postdata);
276
277 $postdata = $this->_prepare_post_body($formvars, $formfiles);
278
279 $URI_PARTS = parse_url($URI);
280 if (!empty($URI_PARTS["user"]))
281 $this->user = $URI_PARTS["user"];
282 if (!empty($URI_PARTS["pass"]))
283 $this->pass = $URI_PARTS["pass"];
284 if (empty($URI_PARTS["query"]))
285 $URI_PARTS["query"] = '';
286 if (empty($URI_PARTS["path"]))
287 $URI_PARTS["path"] = '';
288
289 switch(strtolower($URI_PARTS["scheme"]))
290 {
291 case "http":
292 $this->host = $URI_PARTS["host"];
293 if(!empty($URI_PARTS["port"]))
294 $this->port = $URI_PARTS["port"];
295 if($this->_connect($fp))
296 {
297 if($this->_isproxy)
298 {
299 // using proxy, send entire URI
300 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
301 }
302 else
303 {
304 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
305 // no proxy, send only the path
306 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
307 }
308
309 $this->_disconnect($fp);
310
311 if($this->_redirectaddr)
312 {
313 /* url was redirected, check if we've hit the max depth */
314 if($this->maxredirs > $this->_redirectdepth)
315 {
316 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
317 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
318
319 // only follow redirect if it's on this site, or offsiteok is true
320 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
321 {
322 /* follow the redirect */
323 $this->_redirectdepth++;
324 $this->lastredirectaddr=$this->_redirectaddr;
325 if( strpos( $this->_redirectaddr, "?" ) > 0 )
326 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
327 else
328 $this->submit($this->_redirectaddr,$formvars, $formfiles);
329 }
330 }
331 }
332
333 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
334 {
335 $frameurls = $this->_frameurls;
336 $this->_frameurls = array();
337
338 while(list(,$frameurl) = each($frameurls))
339 {
340 if($this->_framedepth < $this->maxframes)
341 {
342 $this->fetch($frameurl);
343 $this->_framedepth++;
344 }
345 else
346 break;
347 }
348 }
349
350 }
351 else
352 {
353 return false;
354 }
355 return true;
356 break;
357 case "https":
358 if(!$this->curl_path)
359 return false;
360 if(function_exists("is_executable"))
361 if (!is_executable($this->curl_path))
362 return false;
363 $this->host = $URI_PARTS["host"];
364 if(!empty($URI_PARTS["port"]))
365 $this->port = $URI_PARTS["port"];
366 if($this->_isproxy)
367 {
368 // using proxy, send entire URI
369 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
370 }
371 else
372 {
373 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
374 // no proxy, send only the path
375 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376 }
377
378 if($this->_redirectaddr)
379 {
380 /* url was redirected, check if we've hit the max depth */
381 if($this->maxredirs > $this->_redirectdepth)
382 {
383 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
384 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
385
386 // only follow redirect if it's on this site, or offsiteok is true
387 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
388 {
389 /* follow the redirect */
390 $this->_redirectdepth++;
391 $this->lastredirectaddr=$this->_redirectaddr;
392 if( strpos( $this->_redirectaddr, "?" ) > 0 )
393 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
394 else
395 $this->submit($this->_redirectaddr,$formvars, $formfiles);
396 }
397 }
398 }
399
400 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
401 {
402 $frameurls = $this->_frameurls;
403 $this->_frameurls = array();
404
405 while(list(,$frameurl) = each($frameurls))
406 {
407 if($this->_framedepth < $this->maxframes)
408 {
409 $this->fetch($frameurl);
410 $this->_framedepth++;
411 }
412 else
413 break;
414 }
415 }
416 return true;
417 break;
418
419 default:
420 // not a valid protocol
421 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
422 return false;
423 break;
424 }
425 return true;
426 }
427
428/*======================================================================*\
429 Function: fetchlinks
430 Purpose: fetch the links from a web page
431 Input: $URI where you are fetching from
432 Output: $this->results an array of the URLs
433\*======================================================================*/
434
435 function fetchlinks($URI)
436 {
437 if ($this->fetch($URI))
438 {
439 if($this->lastredirectaddr)
440 $URI = $this->lastredirectaddr;
441 if(is_array($this->results))
442 {
443 for($x=0;$x<count($this->results);$x++)
444 $this->results[$x] = $this->_striplinks($this->results[$x]);
445 }
446 else
447 $this->results = $this->_striplinks($this->results);
448
449 if($this->expandlinks)
450 $this->results = $this->_expandlinks($this->results, $URI);
451 return true;
452 }
453 else
454 return false;
455 }
456
457/*======================================================================*\
458 Function: fetchform
459 Purpose: fetch the form elements from a web page
460 Input: $URI where you are fetching from
461 Output: $this->results the resulting html form
462\*======================================================================*/
463
464 function fetchform($URI)
465 {
466
467 if ($this->fetch($URI))
468 {
469
470 if(is_array($this->results))
471 {
472 for($x=0;$x<count($this->results);$x++)
473 $this->results[$x] = $this->_stripform($this->results[$x]);
474 }
475 else
476 $this->results = $this->_stripform($this->results);
477
478 return true;
479 }
480 else
481 return false;
482 }
483
484
485/*======================================================================*\
486 Function: fetchtext
487 Purpose: fetch the text from a web page, stripping the links
488 Input: $URI where you are fetching from
489 Output: $this->results the text from the web page
490\*======================================================================*/
491
492 function fetchtext($URI)
493 {
494 if($this->fetch($URI))
495 {
496 if(is_array($this->results))
497 {
498 for($x=0;$x<count($this->results);$x++)
499 $this->results[$x] = $this->_striptext($this->results[$x]);
500 }
501 else
502 $this->results = $this->_striptext($this->results);
503 return true;
504 }
505 else
506 return false;
507 }
508
509/*======================================================================*\
510 Function: submitlinks
511 Purpose: grab links from a form submission
512 Input: $URI where you are submitting from
513 Output: $this->results an array of the links from the post
514\*======================================================================*/
515
516 function submitlinks($URI, $formvars="", $formfiles="")
517 {
518 if($this->submit($URI,$formvars, $formfiles))
519 {
520 if($this->lastredirectaddr)
521 $URI = $this->lastredirectaddr;
522 if(is_array($this->results))
523 {
524 for($x=0;$x<count($this->results);$x++)
525 {
526 $this->results[$x] = $this->_striplinks($this->results[$x]);
527 if($this->expandlinks)
528 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
529 }
530 }
531 else
532 {
533 $this->results = $this->_striplinks($this->results);
534 if($this->expandlinks)
535 $this->results = $this->_expandlinks($this->results,$URI);
536 }
537 return true;
538 }
539 else
540 return false;
541 }
542
543/*======================================================================*\
544 Function: submittext
545 Purpose: grab text from a form submission
546 Input: $URI where you are submitting from
547 Output: $this->results the text from the web page
548\*======================================================================*/
549
550 function submittext($URI, $formvars = "", $formfiles = "")
551 {
552 if($this->submit($URI,$formvars, $formfiles))
553 {
554 if($this->lastredirectaddr)
555 $URI = $this->lastredirectaddr;
556 if(is_array($this->results))
557 {
558 for($x=0;$x<count($this->results);$x++)
559 {
560 $this->results[$x] = $this->_striptext($this->results[$x]);
561 if($this->expandlinks)
562 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
563 }
564 }
565 else
566 {
567 $this->results = $this->_striptext($this->results);
568 if($this->expandlinks)
569 $this->results = $this->_expandlinks($this->results,$URI);
570 }
571 return true;
572 }
573 else
574 return false;
575 }
576
577
578
579/*======================================================================*\
580 Function: set_submit_multipart
581 Purpose: Set the form submission content type to
582 multipart/form-data
583\*======================================================================*/
584 function set_submit_multipart()
585 {
586 $this->_submit_type = "multipart/form-data";
587 }
588
589
590/*======================================================================*\
591 Function: set_submit_normal
592 Purpose: Set the form submission content type to
593 application/x-www-form-urlencoded
594\*======================================================================*/
595 function set_submit_normal()
596 {
597 $this->_submit_type = "application/x-www-form-urlencoded";
598 }
599
600
601
602
603/*======================================================================*\
604 Private functions
605\*======================================================================*/
606
607
608/*======================================================================*\
609 Function: _striplinks
610 Purpose: strip the hyperlinks from an html document
611 Input: $document document to strip.
612 Output: $match an array of the links
613\*======================================================================*/
614
615 function _striplinks($document)
616 {
617 preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
618 ([\"\'])? # find single or double quote
619 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
620 # quote, otherwise match up to next space
621 'isx",$document,$links);
622
623
624 // catenate the non-empty matches from the conditional subpattern
625
626 while(list($key,$val) = each($links[2]))
627 {
628 if(!empty($val))
629 $match[] = $val;
630 }
631
632 while(list($key,$val) = each($links[3]))
633 {
634 if(!empty($val))
635 $match[] = $val;
636 }
637
638 // return the links
639 return $match;
640 }
641
642/*======================================================================*\
643 Function: _stripform
644 Purpose: strip the form elements from an html document
645 Input: $document document to strip.
646 Output: $match an array of the links
647\*======================================================================*/
648
649 function _stripform($document)
650 {
651 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
652
653 // catenate the matches
654 $match = implode("\r\n",$elements[0]);
655
656 // return the links
657 return $match;
658 }
659
660
661
662/*======================================================================*\
663 Function: _striptext
664 Purpose: strip the text from an html document
665 Input: $document document to strip.
666 Output: $text the resulting text
667\*======================================================================*/
668
669 function _striptext($document)
670 {
671
672 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
673 // so, list your entities one by one here. I included some of the
674 // more common ones.
675
676 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
677 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
678 "'([\r\n])[\s]+'", // strip out white space
679 "'&(quot|#34|#034|#x22);'i", // replace html entities
680 "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
681 "'&(lt|#60|#060|#x3c);'i",
682 "'&(gt|#62|#062|#x3e);'i",
683 "'&(nbsp|#160|#xa0);'i",
684 "'&(iexcl|#161);'i",
685 "'&(cent|#162);'i",
686 "'&(pound|#163);'i",
687 "'&(copy|#169);'i",
688 "'&(reg|#174);'i",
689 "'&(deg|#176);'i",
690 "'&(#39|#039|#x27);'",
691 "'&(euro|#8364);'i", // europe
692 "'&a(uml|UML);'", // german
693 "'&o(uml|UML);'",
694 "'&u(uml|UML);'",
695 "'&A(uml|UML);'",
696 "'&O(uml|UML);'",
697 "'&U(uml|UML);'",
698 "'&szlig;'i",
699 );
700 $replace = array( "",
701 "",
702 "\\1",
703 "\"",
704 "&",
705 "<",
706 ">",
707 " ",
708 chr(161),
709 chr(162),
710 chr(163),
711 chr(169),
712 chr(174),
713 chr(176),
714 chr(39),
715 chr(128),
716 "ä",
717 "ö",
718 "ü",
719 "Ä",
720 "Ö",
721 "Ü",
722 "ß",
723 );
724
725 $text = preg_replace($search,$replace,$document);
726
727 return $text;
728 }
729
730/*======================================================================*\
731 Function: _expandlinks
732 Purpose: expand each link into a fully qualified URL
733 Input: $links the links to qualify
734 $URI the full URI to get the base from
735 Output: $expandedLinks the expanded links
736\*======================================================================*/
737
738 function _expandlinks($links,$URI)
739 {
740
741 preg_match("/^[^\?]+/",$URI,$match);
742
743 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
744 $match = preg_replace("|/$|","",$match);
745 $match_part = parse_url($match);
746 $match_root =
747 $match_part["scheme"]."://".$match_part["host"];
748
749 $search = array( "|^http://".preg_quote($this->host)."|i",
750 "|^(\/)|i",
751 "|^(?!http://)(?!mailto:)|i",
752 "|/\./|",
753 "|/[^\/]+/\.\./|"
754 );
755
756 $replace = array( "",
757 $match_root."/",
758 $match."/",
759 "/",
760 "/"
761 );
762
763 $expandedLinks = preg_replace($search,$replace,$links);
764
765 return $expandedLinks;
766 }
767
768/*======================================================================*\
769 Function: _httprequest
770 Purpose: go get the http data from the server
771 Input: $url the url to fetch
772 $fp the current open file pointer
773 $URI the full URI
774 $body body contents to send if any (POST)
775 Output:
776\*======================================================================*/
777
778 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
779 {
780 $cookie_headers = '';
781 if($this->passcookies && $this->_redirectaddr)
782 $this->setcookies();
783
784 $URI_PARTS = parse_url($URI);
785 if(empty($url))
786 $url = "/";
787 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
788 if(!empty($this->agent))
789 $headers .= "User-Agent: ".$this->agent."\r\n";
790 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
791 $headers .= "Host: ".$this->host;
792 if(!empty($this->port) && $this->port != 80)
793 $headers .= ":".$this->port;
794 $headers .= "\r\n";
795 }
796 if(!empty($this->accept))
797 $headers .= "Accept: ".$this->accept."\r\n";
798 if(!empty($this->referer))
799 $headers .= "Referer: ".$this->referer."\r\n";
800 if(!empty($this->cookies))
801 {
802 if(!is_array($this->cookies))
803 $this->cookies = (array)$this->cookies;
804
805 reset($this->cookies);
806 if ( count($this->cookies) > 0 ) {
807 $cookie_headers .= 'Cookie: ';
808 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
809 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
810 }
811 $headers .= substr($cookie_headers,0,-2) . "\r\n";
812 }
813 }
814 if(!empty($this->rawheaders))
815 {
816 if(!is_array($this->rawheaders))
817 $this->rawheaders = (array)$this->rawheaders;
818 while(list($headerKey,$headerVal) = each($this->rawheaders))
819 $headers .= $headerKey.": ".$headerVal."\r\n";
820 }
821 if(!empty($content_type)) {
822 $headers .= "Content-type: $content_type";
823 if ($content_type == "multipart/form-data")
824 $headers .= "; boundary=".$this->_mime_boundary;
825 $headers .= "\r\n";
826 }
827 if(!empty($body))
828 $headers .= "Content-length: ".strlen($body)."\r\n";
829 if(!empty($this->user) || !empty($this->pass))
830 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
831
832 //add proxy auth headers
833 if(!empty($this->proxy_user))
834 $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
835
836
837 $headers .= "\r\n";
838
839 // set the read timeout if needed
840 if ($this->read_timeout > 0)
841 socket_set_timeout($fp, $this->read_timeout);
842 $this->timed_out = false;
843
844 fwrite($fp,$headers.$body,strlen($headers.$body));
845
846 $this->_redirectaddr = false;
847 unset($this->headers);
848
849 while($currentHeader = fgets($fp,$this->_maxlinelen))
850 {
851 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
852 {
853 $this->status=-100;
854 return false;
855 }
856
857 if($currentHeader == "\r\n")
858 break;
859
860 // if a header begins with Location: or URI:, set the redirect
861 if(preg_match("/^(Location:|URI:)/i",$currentHeader))
862 {
863 // get URL portion of the redirect
864 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
865 // look for :// in the Location header to see if hostname is included
866 if(!preg_match("|\:\/\/|",$matches[2]))
867 {
868 // no host in the path, so prepend
869 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
870 // eliminate double slash
871 if(!preg_match("|^/|",$matches[2]))
872 $this->_redirectaddr .= "/".$matches[2];
873 else
874 $this->_redirectaddr .= $matches[2];
875 }
876 else
877 $this->_redirectaddr = $matches[2];
878 }
879
880 if(preg_match("|^HTTP/|",$currentHeader))
881 {
882 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
883 {
884 $this->status= $status[1];
885 }
886 $this->response_code = $currentHeader;
887 }
888
889 $this->headers[] = $currentHeader;
890 }
891
892 $results = '';
893 do {
894 $_data = fread($fp, $this->maxlength);
895 if (strlen($_data) == 0) {
896 break;
897 }
898 $results .= $_data;
899 } while(true);
900
901 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
902 {
903 $this->status=-100;
904 return false;
905 }
906
907 // check if there is a a redirect meta tag
908
909 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
910
911 {
912 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
913 }
914
915 // have we hit our frame depth and is there frame src to fetch?
916 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
917 {
918 $this->results[] = $results;
919 for($x=0; $x<count($match[1]); $x++)
920 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
921 }
922 // have we already fetched framed content?
923 elseif(is_array($this->results))
924 $this->results[] = $results;
925 // no framed content
926 else
927 $this->results = $results;
928
929 return true;
930 }
931
932/*======================================================================*\
933 Function: _httpsrequest
934 Purpose: go get the https data from the server using curl
935 Input: $url the url to fetch
936 $URI the full URI
937 $body body contents to send if any (POST)
938 Output:
939\*======================================================================*/
940
941 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
942 {
943 if($this->passcookies && $this->_redirectaddr)
944 $this->setcookies();
945
946 $headers = array();
947
948 $URI_PARTS = parse_url($URI);
949 if(empty($url))
950 $url = "/";
951 // GET ... header not needed for curl
952 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
953 if(!empty($this->agent))
954 $headers[] = "User-Agent: ".$this->agent;
955 if(!empty($this->host))
956 if(!empty($this->port))
957 $headers[] = "Host: ".$this->host.":".$this->port;
958 else
959 $headers[] = "Host: ".$this->host;
960 if(!empty($this->accept))
961 $headers[] = "Accept: ".$this->accept;
962 if(!empty($this->referer))
963 $headers[] = "Referer: ".$this->referer;
964 if(!empty($this->cookies))
965 {
966 if(!is_array($this->cookies))
967 $this->cookies = (array)$this->cookies;
968
969 reset($this->cookies);
970 if ( count($this->cookies) > 0 ) {
971 $cookie_str = 'Cookie: ';
972 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
973 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
974 }
975 $headers[] = substr($cookie_str,0,-2);
976 }
977 }
978 if(!empty($this->rawheaders))
979 {
980 if(!is_array($this->rawheaders))
981 $this->rawheaders = (array)$this->rawheaders;
982 while(list($headerKey,$headerVal) = each($this->rawheaders))
983 $headers[] = $headerKey.": ".$headerVal;
984 }
985 if(!empty($content_type)) {
986 if ($content_type == "multipart/form-data")
987 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
988 else
989 $headers[] = "Content-type: $content_type";
990 }
991 if(!empty($body))
992 $headers[] = "Content-length: ".strlen($body);
993 if(!empty($this->user) || !empty($this->pass))
994 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
995
996 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
997 $safer_header = strtr( $headers[$curr_header], "\"", " " );
998 $cmdline_params .= " -H \"".$safer_header."\"";
999 }
1000
1001 if(!empty($body))
1002 $cmdline_params .= " -d \"$body\"";
1003
1004 if($this->read_timeout > 0)
1005 $cmdline_params .= " -m ".$this->read_timeout;
1006
1007 $headerfile = tempnam($temp_dir, "sno");
1008
1009 exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1010
1011 if($return)
1012 {
1013 $this->error = "Error: cURL could not retrieve the document, error $return.";
1014 return false;
1015 }
1016
1017
1018 $results = implode("\r\n",$results);
1019
1020 $result_headers = file("$headerfile");
1021
1022 $this->_redirectaddr = false;
1023 unset($this->headers);
1024
1025 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1026 {
1027
1028 // if a header begins with Location: or URI:, set the redirect
1029 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1030 {
1031 // get URL portion of the redirect
1032 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1033 // look for :// in the Location header to see if hostname is included
1034 if(!preg_match("|\:\/\/|",$matches[2]))
1035 {
1036 // no host in the path, so prepend
1037 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1038 // eliminate double slash
1039 if(!preg_match("|^/|",$matches[2]))
1040 $this->_redirectaddr .= "/".$matches[2];
1041 else
1042 $this->_redirectaddr .= $matches[2];
1043 }
1044 else
1045 $this->_redirectaddr = $matches[2];
1046 }
1047
1048 if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1049 $this->response_code = $result_headers[$currentHeader];
1050
1051 $this->headers[] = $result_headers[$currentHeader];
1052 }
1053
1054 // check if there is a a redirect meta tag
1055
1056 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1057 {
1058 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1059 }
1060
1061 // have we hit our frame depth and is there frame src to fetch?
1062 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1063 {
1064 $this->results[] = $results;
1065 for($x=0; $x<count($match[1]); $x++)
1066 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1067 }
1068 // have we already fetched framed content?
1069 elseif(is_array($this->results))
1070 $this->results[] = $results;
1071 // no framed content
1072 else
1073 $this->results = $results;
1074
1075 unlink("$headerfile");
1076
1077 return true;
1078 }
1079
1080/*======================================================================*\
1081 Function: setcookies()
1082 Purpose: set cookies for a redirection
1083\*======================================================================*/
1084
1085 function setcookies()
1086 {
1087 for($x=0; $x<count($this->headers); $x++)
1088 {
1089 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1090 $this->cookies[$match[1]] = urldecode($match[2]);
1091 }
1092 }
1093
1094
1095/*======================================================================*\
1096 Function: _check_timeout
1097 Purpose: checks whether timeout has occurred
1098 Input: $fp file pointer
1099\*======================================================================*/
1100
1101 function _check_timeout($fp)
1102 {
1103 if ($this->read_timeout > 0) {
1104 $fp_status = socket_get_status($fp);
1105 if ($fp_status["timed_out"]) {
1106 $this->timed_out = true;
1107 return true;
1108 }
1109 }
1110 return false;
1111 }
1112
1113/*======================================================================*\
1114 Function: _connect
1115 Purpose: make a socket connection
1116 Input: $fp file pointer
1117\*======================================================================*/
1118
1119 function _connect(&$fp)
1120 {
1121 if(!empty($this->proxy_host) && !empty($this->proxy_port))
1122 {
1123 $this->_isproxy = true;
1124
1125 $host = $this->proxy_host;
1126 $port = $this->proxy_port;
1127 }
1128 else
1129 {
1130 $host = $this->host;
1131 $port = $this->port;
1132 }
1133
1134 $this->status = 0;
1135
1136 if($fp = fsockopen(
1137 $host,
1138 $port,
1139 $errno,
1140 $errstr,
1141 $this->_fp_timeout
1142 ))
1143 {
1144 // socket connection succeeded
1145
1146 return true;
1147 }
1148 else
1149 {
1150 // socket connection failed
1151 $this->status = $errno;
1152 switch($errno)
1153 {
1154 case -3:
1155 $this->error="socket creation failed (-3)";
1156 case -4:
1157 $this->error="dns lookup failure (-4)";
1158 case -5:
1159 $this->error="connection refused or timed out (-5)";
1160 default:
1161 $this->error="connection failed (".$errno.")";
1162 }
1163 return false;
1164 }
1165 }
1166/*======================================================================*\
1167 Function: _disconnect
1168 Purpose: disconnect a socket connection
1169 Input: $fp file pointer
1170\*======================================================================*/
1171
1172 function _disconnect($fp)
1173 {
1174 return(fclose($fp));
1175 }
1176
1177
1178/*======================================================================*\
1179 Function: _prepare_post_body
1180 Purpose: Prepare post body according to encoding type
1181 Input: $formvars - form variables
1182 $formfiles - form upload files
1183 Output: post body
1184\*======================================================================*/
1185
1186 function _prepare_post_body($formvars, $formfiles)
1187 {
1188 settype($formvars, "array");
1189 settype($formfiles, "array");
1190 $postdata = '';
1191
1192 if (count($formvars) == 0 && count($formfiles) == 0)
1193 return;
1194
1195 switch ($this->_submit_type) {
1196 case "application/x-www-form-urlencoded":
1197 reset($formvars);
1198 while(list($key,$val) = each($formvars)) {
1199 if (is_array($val) || is_object($val)) {
1200 while (list($cur_key, $cur_val) = each($val)) {
1201 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1202 }
1203 } else
1204 $postdata .= urlencode($key)."=".urlencode($val)."&";
1205 }
1206 break;
1207
1208 case "multipart/form-data":
1209 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1210
1211 reset($formvars);
1212 while(list($key,$val) = each($formvars)) {
1213 if (is_array($val) || is_object($val)) {
1214 while (list($cur_key, $cur_val) = each($val)) {
1215 $postdata .= "--".$this->_mime_boundary."\r\n";
1216 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1217 $postdata .= "$cur_val\r\n";
1218 }
1219 } else {
1220 $postdata .= "--".$this->_mime_boundary."\r\n";
1221 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1222 $postdata .= "$val\r\n";
1223 }
1224 }
1225
1226 reset($formfiles);
1227 while (list($field_name, $file_names) = each($formfiles)) {
1228 settype($file_names, "array");
1229 while (list(, $file_name) = each($file_names)) {
1230 if (!is_readable($file_name)) continue;
1231
1232 $fp = fopen($file_name, "r");
1233 $file_content = fread($fp, filesize($file_name));
1234 fclose($fp);
1235 $base_name = basename($file_name);
1236
1237 $postdata .= "--".$this->_mime_boundary."\r\n";
1238 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1239 $postdata .= "$file_content\r\n";
1240 }
1241 }
1242 $postdata .= "--".$this->_mime_boundary."--\r\n";
1243 break;
1244 }
1245
1246 return $postdata;
1247 }
1248}
1249endif;
1250?>
Note: See TracBrowser for help on using the repository browser.