[44] | 1 | <?php
|
---|
| 2 |
|
---|
| 3 | /*************************************************
|
---|
| 4 |
|
---|
| 5 | Snoopy - the PHP net client
|
---|
| 6 | Author: Monte Ohrt <monte@ispi.net>
|
---|
| 7 | Copyright (c): 1999-2000 ispi, all rights reserved
|
---|
| 8 | Version: 1.01
|
---|
| 9 |
|
---|
| 10 | * This library is free software; you can redistribute it and/or
|
---|
| 11 | * modify it under the terms of the GNU Lesser General Public
|
---|
| 12 | * License as published by the Free Software Foundation; either
|
---|
| 13 | * version 2.1 of the License, or (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This library is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 18 | * Lesser General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU Lesser General Public
|
---|
| 21 | * License along with this library; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
---|
| 23 |
|
---|
| 24 | You may contact the author of Snoopy by e-mail at:
|
---|
| 25 | monte@ispi.net
|
---|
| 26 |
|
---|
| 27 | Or, write to:
|
---|
| 28 | Monte Ohrt
|
---|
| 29 | CTO, ispi
|
---|
| 30 | 237 S. 70th suite 220
|
---|
| 31 | Lincoln, NE 68510
|
---|
| 32 |
|
---|
| 33 | The latest version of Snoopy can be obtained from:
|
---|
| 34 | http://snoopy.sourceforge.net/
|
---|
| 35 |
|
---|
| 36 | *************************************************/
|
---|
| 37 |
|
---|
| 38 | if ( !in_array('Snoopy', get_declared_classes() ) ) :
|
---|
| 39 | class Snoopy
|
---|
| 40 | {
|
---|
| 41 | /**** Public variables ****/
|
---|
| 42 |
|
---|
| 43 | /* user definable vars */
|
---|
| 44 |
|
---|
| 45 | var $host = "www.php.net"; // host name we are connecting to
|
---|
| 46 | var $port = 80; // port we are connecting to
|
---|
| 47 | var $proxy_host = ""; // proxy host to use
|
---|
| 48 | var $proxy_port = ""; // proxy port to use
|
---|
| 49 | var $proxy_user = ""; // proxy user to use
|
---|
| 50 | var $proxy_pass = ""; // proxy password to use
|
---|
| 51 |
|
---|
| 52 | var $agent = "Snoopy v1.2.3"; // agent we masquerade as
|
---|
| 53 | var $referer = ""; // referer info to pass
|
---|
| 54 | var $cookies = array(); // array of cookies to pass
|
---|
| 55 | // $cookies["username"]="joe";
|
---|
| 56 | var $rawheaders = array(); // array of raw headers to send
|
---|
| 57 | // $rawheaders["Content-type"]="text/html";
|
---|
| 58 |
|
---|
| 59 | var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
|
---|
| 60 | var $lastredirectaddr = ""; // contains address of last redirected address
|
---|
| 61 | var $offsiteok = true; // allows redirection off-site
|
---|
| 62 | var $maxframes = 0; // frame content depth maximum. 0 = disallow
|
---|
| 63 | var $expandlinks = true; // expand links to fully qualified URLs.
|
---|
| 64 | // this only applies to fetchlinks()
|
---|
| 65 | // submitlinks(), and submittext()
|
---|
| 66 | var $passcookies = true; // pass set cookies back through redirects
|
---|
| 67 | // NOTE: this currently does not respect
|
---|
| 68 | // dates, domains or paths.
|
---|
| 69 |
|
---|
| 70 | var $user = ""; // user for http authentication
|
---|
| 71 | var $pass = ""; // password for http authentication
|
---|
| 72 |
|
---|
| 73 | // http accept types
|
---|
| 74 | var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
|
---|
| 75 |
|
---|
| 76 | var $results = ""; // where the content is put
|
---|
| 77 |
|
---|
| 78 | var $error = ""; // error messages sent here
|
---|
| 79 | var $response_code = ""; // response code returned from server
|
---|
| 80 | var $headers = array(); // headers returned from server sent here
|
---|
| 81 | var $maxlength = 8192; // max return data length (body)
|
---|
| 82 | var $read_timeout = 0; // timeout on read operations, in seconds
|
---|
| 83 | // supported only since PHP 4 Beta 4
|
---|
| 84 | // set to 0 to disallow timeouts
|
---|
| 85 | var $timed_out = false; // if a read operation timed out
|
---|
| 86 | var $status = 0; // http request status
|
---|
| 87 |
|
---|
| 88 | var $temp_dir = "/tmp"; // temporary directory that the webserver
|
---|
| 89 | // has permission to write to.
|
---|
| 90 | // under Windows, this should be C:\temp
|
---|
| 91 |
|
---|
| 92 | var $curl_path = "/usr/local/bin/curl";
|
---|
| 93 | // Snoopy will use cURL for fetching
|
---|
| 94 | // SSL content if a full system path to
|
---|
| 95 | // the cURL binary is supplied here.
|
---|
| 96 | // set to false if you do not have
|
---|
| 97 | // cURL installed. See http://curl.haxx.se
|
---|
| 98 | // for details on installing cURL.
|
---|
| 99 | // Snoopy does *not* use the cURL
|
---|
| 100 | // library functions built into php,
|
---|
| 101 | // as these functions are not stable
|
---|
| 102 | // as of this Snoopy release.
|
---|
| 103 |
|
---|
| 104 | /**** Private variables ****/
|
---|
| 105 |
|
---|
| 106 | var $_maxlinelen = 4096; // max line length (headers)
|
---|
| 107 |
|
---|
| 108 | var $_httpmethod = "GET"; // default http request method
|
---|
| 109 | var $_httpversion = "HTTP/1.0"; // default http request version
|
---|
| 110 | var $_submit_method = "POST"; // default submit method
|
---|
| 111 | var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
|
---|
| 112 | var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
|
---|
| 113 | var $_redirectaddr = false; // will be set if page fetched is a redirect
|
---|
| 114 | var $_redirectdepth = 0; // increments on an http redirect
|
---|
| 115 | var $_frameurls = array(); // frame src urls
|
---|
| 116 | var $_framedepth = 0; // increments on frame depth
|
---|
| 117 |
|
---|
| 118 | var $_isproxy = false; // set if using a proxy server
|
---|
| 119 | var $_fp_timeout = 30; // timeout for socket connection
|
---|
| 120 |
|
---|
| 121 | /*======================================================================*\
|
---|
| 122 | Function: fetch
|
---|
| 123 | Purpose: fetch the contents of a web page
|
---|
| 124 | (and possibly other protocols in the
|
---|
| 125 | future like ftp, nntp, gopher, etc.)
|
---|
| 126 | Input: $URI the location of the page to fetch
|
---|
| 127 | Output: $this->results the output text from the fetch
|
---|
| 128 | \*======================================================================*/
|
---|
| 129 |
|
---|
| 130 | function fetch($URI)
|
---|
| 131 | {
|
---|
| 132 |
|
---|
| 133 | //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
|
---|
| 134 | $URI_PARTS = parse_url($URI);
|
---|
| 135 | if (!empty($URI_PARTS["user"]))
|
---|
| 136 | $this->user = $URI_PARTS["user"];
|
---|
| 137 | if (!empty($URI_PARTS["pass"]))
|
---|
| 138 | $this->pass = $URI_PARTS["pass"];
|
---|
| 139 | if (empty($URI_PARTS["query"]))
|
---|
| 140 | $URI_PARTS["query"] = '';
|
---|
| 141 | if (empty($URI_PARTS["path"]))
|
---|
| 142 | $URI_PARTS["path"] = '';
|
---|
| 143 |
|
---|
| 144 | switch(strtolower($URI_PARTS["scheme"]))
|
---|
| 145 | {
|
---|
| 146 | case "http":
|
---|
| 147 | $this->host = $URI_PARTS["host"];
|
---|
| 148 | if(!empty($URI_PARTS["port"]))
|
---|
| 149 | $this->port = $URI_PARTS["port"];
|
---|
| 150 | if($this->_connect($fp))
|
---|
| 151 | {
|
---|
| 152 | if($this->_isproxy)
|
---|
| 153 | {
|
---|
| 154 | // using proxy, send entire URI
|
---|
| 155 | $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
|
---|
| 156 | }
|
---|
| 157 | else
|
---|
| 158 | {
|
---|
| 159 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 160 | // no proxy, send only the path
|
---|
| 161 | $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
|
---|
| 162 | }
|
---|
| 163 |
|
---|
| 164 | $this->_disconnect($fp);
|
---|
| 165 |
|
---|
| 166 | if($this->_redirectaddr)
|
---|
| 167 | {
|
---|
| 168 | /* url was redirected, check if we've hit the max depth */
|
---|
| 169 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 170 | {
|
---|
| 171 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 172 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 173 | {
|
---|
| 174 | /* follow the redirect */
|
---|
| 175 | $this->_redirectdepth++;
|
---|
| 176 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 177 | $this->fetch($this->_redirectaddr);
|
---|
| 178 | }
|
---|
| 179 | }
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 183 | {
|
---|
| 184 | $frameurls = $this->_frameurls;
|
---|
| 185 | $this->_frameurls = array();
|
---|
| 186 |
|
---|
| 187 | while(list(,$frameurl) = each($frameurls))
|
---|
| 188 | {
|
---|
| 189 | if($this->_framedepth < $this->maxframes)
|
---|
| 190 | {
|
---|
| 191 | $this->fetch($frameurl);
|
---|
| 192 | $this->_framedepth++;
|
---|
| 193 | }
|
---|
| 194 | else
|
---|
| 195 | break;
|
---|
| 196 | }
|
---|
| 197 | }
|
---|
| 198 | }
|
---|
| 199 | else
|
---|
| 200 | {
|
---|
| 201 | return false;
|
---|
| 202 | }
|
---|
| 203 | return true;
|
---|
| 204 | break;
|
---|
| 205 | case "https":
|
---|
| 206 | if(!$this->curl_path)
|
---|
| 207 | return false;
|
---|
| 208 | if(function_exists("is_executable"))
|
---|
| 209 | if (!is_executable($this->curl_path))
|
---|
| 210 | return false;
|
---|
| 211 | $this->host = $URI_PARTS["host"];
|
---|
| 212 | if(!empty($URI_PARTS["port"]))
|
---|
| 213 | $this->port = $URI_PARTS["port"];
|
---|
| 214 | if($this->_isproxy)
|
---|
| 215 | {
|
---|
| 216 | // using proxy, send entire URI
|
---|
| 217 | $this->_httpsrequest($URI,$URI,$this->_httpmethod);
|
---|
| 218 | }
|
---|
| 219 | else
|
---|
| 220 | {
|
---|
| 221 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 222 | // no proxy, send only the path
|
---|
| 223 | $this->_httpsrequest($path, $URI, $this->_httpmethod);
|
---|
| 224 | }
|
---|
| 225 |
|
---|
| 226 | if($this->_redirectaddr)
|
---|
| 227 | {
|
---|
| 228 | /* url was redirected, check if we've hit the max depth */
|
---|
| 229 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 230 | {
|
---|
| 231 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 232 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 233 | {
|
---|
| 234 | /* follow the redirect */
|
---|
| 235 | $this->_redirectdepth++;
|
---|
| 236 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 237 | $this->fetch($this->_redirectaddr);
|
---|
| 238 | }
|
---|
| 239 | }
|
---|
| 240 | }
|
---|
| 241 |
|
---|
| 242 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 243 | {
|
---|
| 244 | $frameurls = $this->_frameurls;
|
---|
| 245 | $this->_frameurls = array();
|
---|
| 246 |
|
---|
| 247 | while(list(,$frameurl) = each($frameurls))
|
---|
| 248 | {
|
---|
| 249 | if($this->_framedepth < $this->maxframes)
|
---|
| 250 | {
|
---|
| 251 | $this->fetch($frameurl);
|
---|
| 252 | $this->_framedepth++;
|
---|
| 253 | }
|
---|
| 254 | else
|
---|
| 255 | break;
|
---|
| 256 | }
|
---|
| 257 | }
|
---|
| 258 | return true;
|
---|
| 259 | break;
|
---|
| 260 | default:
|
---|
| 261 | // not a valid protocol
|
---|
| 262 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
|
---|
| 263 | return false;
|
---|
| 264 | break;
|
---|
| 265 | }
|
---|
| 266 | return true;
|
---|
| 267 | }
|
---|
| 268 |
|
---|
| 269 | /*======================================================================*\
|
---|
| 270 | Function: submit
|
---|
| 271 | Purpose: submit an http form
|
---|
| 272 | Input: $URI the location to post the data
|
---|
| 273 | $formvars the formvars to use.
|
---|
| 274 | format: $formvars["var"] = "val";
|
---|
| 275 | $formfiles an array of files to submit
|
---|
| 276 | format: $formfiles["var"] = "/dir/filename.ext";
|
---|
| 277 | Output: $this->results the text output from the post
|
---|
| 278 | \*======================================================================*/
|
---|
| 279 |
|
---|
| 280 | function submit($URI, $formvars="", $formfiles="")
|
---|
| 281 | {
|
---|
| 282 | unset($postdata);
|
---|
| 283 |
|
---|
| 284 | $postdata = $this->_prepare_post_body($formvars, $formfiles);
|
---|
| 285 |
|
---|
| 286 | $URI_PARTS = parse_url($URI);
|
---|
| 287 | if (!empty($URI_PARTS["user"]))
|
---|
| 288 | $this->user = $URI_PARTS["user"];
|
---|
| 289 | if (!empty($URI_PARTS["pass"]))
|
---|
| 290 | $this->pass = $URI_PARTS["pass"];
|
---|
| 291 | if (empty($URI_PARTS["query"]))
|
---|
| 292 | $URI_PARTS["query"] = '';
|
---|
| 293 | if (empty($URI_PARTS["path"]))
|
---|
| 294 | $URI_PARTS["path"] = '';
|
---|
| 295 |
|
---|
| 296 | switch(strtolower($URI_PARTS["scheme"]))
|
---|
| 297 | {
|
---|
| 298 | case "http":
|
---|
| 299 | $this->host = $URI_PARTS["host"];
|
---|
| 300 | if(!empty($URI_PARTS["port"]))
|
---|
| 301 | $this->port = $URI_PARTS["port"];
|
---|
| 302 | if($this->_connect($fp))
|
---|
| 303 | {
|
---|
| 304 | if($this->_isproxy)
|
---|
| 305 | {
|
---|
| 306 | // using proxy, send entire URI
|
---|
| 307 | $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
|
---|
| 308 | }
|
---|
| 309 | else
|
---|
| 310 | {
|
---|
| 311 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 312 | // no proxy, send only the path
|
---|
| 313 | $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
|
---|
| 314 | }
|
---|
| 315 |
|
---|
| 316 | $this->_disconnect($fp);
|
---|
| 317 |
|
---|
| 318 | if($this->_redirectaddr)
|
---|
| 319 | {
|
---|
| 320 | /* url was redirected, check if we've hit the max depth */
|
---|
| 321 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 322 | {
|
---|
| 323 | if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
|
---|
| 324 | $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
|
---|
| 325 |
|
---|
| 326 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 327 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 328 | {
|
---|
| 329 | /* follow the redirect */
|
---|
| 330 | $this->_redirectdepth++;
|
---|
| 331 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 332 | if( strpos( $this->_redirectaddr, "?" ) > 0 )
|
---|
| 333 | $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
|
---|
| 334 | else
|
---|
| 335 | $this->submit($this->_redirectaddr,$formvars, $formfiles);
|
---|
| 336 | }
|
---|
| 337 | }
|
---|
| 338 | }
|
---|
| 339 |
|
---|
| 340 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 341 | {
|
---|
| 342 | $frameurls = $this->_frameurls;
|
---|
| 343 | $this->_frameurls = array();
|
---|
| 344 |
|
---|
| 345 | while(list(,$frameurl) = each($frameurls))
|
---|
| 346 | {
|
---|
| 347 | if($this->_framedepth < $this->maxframes)
|
---|
| 348 | {
|
---|
| 349 | $this->fetch($frameurl);
|
---|
| 350 | $this->_framedepth++;
|
---|
| 351 | }
|
---|
| 352 | else
|
---|
| 353 | break;
|
---|
| 354 | }
|
---|
| 355 | }
|
---|
| 356 |
|
---|
| 357 | }
|
---|
| 358 | else
|
---|
| 359 | {
|
---|
| 360 | return false;
|
---|
| 361 | }
|
---|
| 362 | return true;
|
---|
| 363 | break;
|
---|
| 364 | case "https":
|
---|
| 365 | if(!$this->curl_path)
|
---|
| 366 | return false;
|
---|
| 367 | if(function_exists("is_executable"))
|
---|
| 368 | if (!is_executable($this->curl_path))
|
---|
| 369 | return false;
|
---|
| 370 | $this->host = $URI_PARTS["host"];
|
---|
| 371 | if(!empty($URI_PARTS["port"]))
|
---|
| 372 | $this->port = $URI_PARTS["port"];
|
---|
| 373 | if($this->_isproxy)
|
---|
| 374 | {
|
---|
| 375 | // using proxy, send entire URI
|
---|
| 376 | $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
|
---|
| 377 | }
|
---|
| 378 | else
|
---|
| 379 | {
|
---|
| 380 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 381 | // no proxy, send only the path
|
---|
| 382 | $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
|
---|
| 383 | }
|
---|
| 384 |
|
---|
| 385 | if($this->_redirectaddr)
|
---|
| 386 | {
|
---|
| 387 | /* url was redirected, check if we've hit the max depth */
|
---|
| 388 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 389 | {
|
---|
| 390 | if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
|
---|
| 391 | $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
|
---|
| 392 |
|
---|
| 393 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 394 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 395 | {
|
---|
| 396 | /* follow the redirect */
|
---|
| 397 | $this->_redirectdepth++;
|
---|
| 398 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 399 | if( strpos( $this->_redirectaddr, "?" ) > 0 )
|
---|
| 400 | $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
|
---|
| 401 | else
|
---|
| 402 | $this->submit($this->_redirectaddr,$formvars, $formfiles);
|
---|
| 403 | }
|
---|
| 404 | }
|
---|
| 405 | }
|
---|
| 406 |
|
---|
| 407 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 408 | {
|
---|
| 409 | $frameurls = $this->_frameurls;
|
---|
| 410 | $this->_frameurls = array();
|
---|
| 411 |
|
---|
| 412 | while(list(,$frameurl) = each($frameurls))
|
---|
| 413 | {
|
---|
| 414 | if($this->_framedepth < $this->maxframes)
|
---|
| 415 | {
|
---|
| 416 | $this->fetch($frameurl);
|
---|
| 417 | $this->_framedepth++;
|
---|
| 418 | }
|
---|
| 419 | else
|
---|
| 420 | break;
|
---|
| 421 | }
|
---|
| 422 | }
|
---|
| 423 | return true;
|
---|
| 424 | break;
|
---|
| 425 |
|
---|
| 426 | default:
|
---|
| 427 | // not a valid protocol
|
---|
| 428 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
|
---|
| 429 | return false;
|
---|
| 430 | break;
|
---|
| 431 | }
|
---|
| 432 | return true;
|
---|
| 433 | }
|
---|
| 434 |
|
---|
| 435 | /*======================================================================*\
|
---|
| 436 | Function: fetchlinks
|
---|
| 437 | Purpose: fetch the links from a web page
|
---|
| 438 | Input: $URI where you are fetching from
|
---|
| 439 | Output: $this->results an array of the URLs
|
---|
| 440 | \*======================================================================*/
|
---|
| 441 |
|
---|
| 442 | function fetchlinks($URI)
|
---|
| 443 | {
|
---|
| 444 | if ($this->fetch($URI))
|
---|
| 445 | {
|
---|
| 446 | if($this->lastredirectaddr)
|
---|
| 447 | $URI = $this->lastredirectaddr;
|
---|
| 448 | if(is_array($this->results))
|
---|
| 449 | {
|
---|
| 450 | for($x=0;$x<count($this->results);$x++)
|
---|
| 451 | $this->results[$x] = $this->_striplinks($this->results[$x]);
|
---|
| 452 | }
|
---|
| 453 | else
|
---|
| 454 | $this->results = $this->_striplinks($this->results);
|
---|
| 455 |
|
---|
| 456 | if($this->expandlinks)
|
---|
| 457 | $this->results = $this->_expandlinks($this->results, $URI);
|
---|
| 458 | return true;
|
---|
| 459 | }
|
---|
| 460 | else
|
---|
| 461 | return false;
|
---|
| 462 | }
|
---|
| 463 |
|
---|
| 464 | /*======================================================================*\
|
---|
| 465 | Function: fetchform
|
---|
| 466 | Purpose: fetch the form elements from a web page
|
---|
| 467 | Input: $URI where you are fetching from
|
---|
| 468 | Output: $this->results the resulting html form
|
---|
| 469 | \*======================================================================*/
|
---|
| 470 |
|
---|
| 471 | function fetchform($URI)
|
---|
| 472 | {
|
---|
| 473 |
|
---|
| 474 | if ($this->fetch($URI))
|
---|
| 475 | {
|
---|
| 476 |
|
---|
| 477 | if(is_array($this->results))
|
---|
| 478 | {
|
---|
| 479 | for($x=0;$x<count($this->results);$x++)
|
---|
| 480 | $this->results[$x] = $this->_stripform($this->results[$x]);
|
---|
| 481 | }
|
---|
| 482 | else
|
---|
| 483 | $this->results = $this->_stripform($this->results);
|
---|
| 484 |
|
---|
| 485 | return true;
|
---|
| 486 | }
|
---|
| 487 | else
|
---|
| 488 | return false;
|
---|
| 489 | }
|
---|
| 490 |
|
---|
| 491 |
|
---|
| 492 | /*======================================================================*\
|
---|
| 493 | Function: fetchtext
|
---|
| 494 | Purpose: fetch the text from a web page, stripping the links
|
---|
| 495 | Input: $URI where you are fetching from
|
---|
| 496 | Output: $this->results the text from the web page
|
---|
| 497 | \*======================================================================*/
|
---|
| 498 |
|
---|
| 499 | function fetchtext($URI)
|
---|
| 500 | {
|
---|
| 501 | if($this->fetch($URI))
|
---|
| 502 | {
|
---|
| 503 | if(is_array($this->results))
|
---|
| 504 | {
|
---|
| 505 | for($x=0;$x<count($this->results);$x++)
|
---|
| 506 | $this->results[$x] = $this->_striptext($this->results[$x]);
|
---|
| 507 | }
|
---|
| 508 | else
|
---|
| 509 | $this->results = $this->_striptext($this->results);
|
---|
| 510 | return true;
|
---|
| 511 | }
|
---|
| 512 | else
|
---|
| 513 | return false;
|
---|
| 514 | }
|
---|
| 515 |
|
---|
| 516 | /*======================================================================*\
|
---|
| 517 | Function: submitlinks
|
---|
| 518 | Purpose: grab links from a form submission
|
---|
| 519 | Input: $URI where you are submitting from
|
---|
| 520 | Output: $this->results an array of the links from the post
|
---|
| 521 | \*======================================================================*/
|
---|
| 522 |
|
---|
| 523 | function submitlinks($URI, $formvars="", $formfiles="")
|
---|
| 524 | {
|
---|
| 525 | if($this->submit($URI,$formvars, $formfiles))
|
---|
| 526 | {
|
---|
| 527 | if($this->lastredirectaddr)
|
---|
| 528 | $URI = $this->lastredirectaddr;
|
---|
| 529 | if(is_array($this->results))
|
---|
| 530 | {
|
---|
| 531 | for($x=0;$x<count($this->results);$x++)
|
---|
| 532 | {
|
---|
| 533 | $this->results[$x] = $this->_striplinks($this->results[$x]);
|
---|
| 534 | if($this->expandlinks)
|
---|
| 535 | $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
|
---|
| 536 | }
|
---|
| 537 | }
|
---|
| 538 | else
|
---|
| 539 | {
|
---|
| 540 | $this->results = $this->_striplinks($this->results);
|
---|
| 541 | if($this->expandlinks)
|
---|
| 542 | $this->results = $this->_expandlinks($this->results,$URI);
|
---|
| 543 | }
|
---|
| 544 | return true;
|
---|
| 545 | }
|
---|
| 546 | else
|
---|
| 547 | return false;
|
---|
| 548 | }
|
---|
| 549 |
|
---|
| 550 | /*======================================================================*\
|
---|
| 551 | Function: submittext
|
---|
| 552 | Purpose: grab text from a form submission
|
---|
| 553 | Input: $URI where you are submitting from
|
---|
| 554 | Output: $this->results the text from the web page
|
---|
| 555 | \*======================================================================*/
|
---|
| 556 |
|
---|
| 557 | function submittext($URI, $formvars = "", $formfiles = "")
|
---|
| 558 | {
|
---|
| 559 | if($this->submit($URI,$formvars, $formfiles))
|
---|
| 560 | {
|
---|
| 561 | if($this->lastredirectaddr)
|
---|
| 562 | $URI = $this->lastredirectaddr;
|
---|
| 563 | if(is_array($this->results))
|
---|
| 564 | {
|
---|
| 565 | for($x=0;$x<count($this->results);$x++)
|
---|
| 566 | {
|
---|
| 567 | $this->results[$x] = $this->_striptext($this->results[$x]);
|
---|
| 568 | if($this->expandlinks)
|
---|
| 569 | $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
|
---|
| 570 | }
|
---|
| 571 | }
|
---|
| 572 | else
|
---|
| 573 | {
|
---|
| 574 | $this->results = $this->_striptext($this->results);
|
---|
| 575 | if($this->expandlinks)
|
---|
| 576 | $this->results = $this->_expandlinks($this->results,$URI);
|
---|
| 577 | }
|
---|
| 578 | return true;
|
---|
| 579 | }
|
---|
| 580 | else
|
---|
| 581 | return false;
|
---|
| 582 | }
|
---|
| 583 |
|
---|
| 584 |
|
---|
| 585 |
|
---|
| 586 | /*======================================================================*\
|
---|
| 587 | Function: set_submit_multipart
|
---|
| 588 | Purpose: Set the form submission content type to
|
---|
| 589 | multipart/form-data
|
---|
| 590 | \*======================================================================*/
|
---|
| 591 | function set_submit_multipart()
|
---|
| 592 | {
|
---|
| 593 | $this->_submit_type = "multipart/form-data";
|
---|
| 594 | }
|
---|
| 595 |
|
---|
| 596 |
|
---|
| 597 | /*======================================================================*\
|
---|
| 598 | Function: set_submit_normal
|
---|
| 599 | Purpose: Set the form submission content type to
|
---|
| 600 | application/x-www-form-urlencoded
|
---|
| 601 | \*======================================================================*/
|
---|
| 602 | function set_submit_normal()
|
---|
| 603 | {
|
---|
| 604 | $this->_submit_type = "application/x-www-form-urlencoded";
|
---|
| 605 | }
|
---|
| 606 |
|
---|
| 607 |
|
---|
| 608 |
|
---|
| 609 |
|
---|
| 610 | /*======================================================================*\
|
---|
| 611 | Private functions
|
---|
| 612 | \*======================================================================*/
|
---|
| 613 |
|
---|
| 614 |
|
---|
| 615 | /*======================================================================*\
|
---|
| 616 | Function: _striplinks
|
---|
| 617 | Purpose: strip the hyperlinks from an html document
|
---|
| 618 | Input: $document document to strip.
|
---|
| 619 | Output: $match an array of the links
|
---|
| 620 | \*======================================================================*/
|
---|
| 621 |
|
---|
| 622 | function _striplinks($document)
|
---|
| 623 | {
|
---|
| 624 | preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
|
---|
| 625 | ([\"\'])? # find single or double quote
|
---|
| 626 | (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
|
---|
| 627 | # quote, otherwise match up to next space
|
---|
| 628 | 'isx",$document,$links);
|
---|
| 629 |
|
---|
| 630 |
|
---|
| 631 | // catenate the non-empty matches from the conditional subpattern
|
---|
| 632 |
|
---|
| 633 | while(list($key,$val) = each($links[2]))
|
---|
| 634 | {
|
---|
| 635 | if(!empty($val))
|
---|
| 636 | $match[] = $val;
|
---|
| 637 | }
|
---|
| 638 |
|
---|
| 639 | while(list($key,$val) = each($links[3]))
|
---|
| 640 | {
|
---|
| 641 | if(!empty($val))
|
---|
| 642 | $match[] = $val;
|
---|
| 643 | }
|
---|
| 644 |
|
---|
| 645 | // return the links
|
---|
| 646 | return $match;
|
---|
| 647 | }
|
---|
| 648 |
|
---|
| 649 | /*======================================================================*\
|
---|
| 650 | Function: _stripform
|
---|
| 651 | Purpose: strip the form elements from an html document
|
---|
| 652 | Input: $document document to strip.
|
---|
| 653 | Output: $match an array of the links
|
---|
| 654 | \*======================================================================*/
|
---|
| 655 |
|
---|
| 656 | function _stripform($document)
|
---|
| 657 | {
|
---|
| 658 | preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
|
---|
| 659 |
|
---|
| 660 | // catenate the matches
|
---|
| 661 | $match = implode("\r\n",$elements[0]);
|
---|
| 662 |
|
---|
| 663 | // return the links
|
---|
| 664 | return $match;
|
---|
| 665 | }
|
---|
| 666 |
|
---|
| 667 |
|
---|
| 668 |
|
---|
| 669 | /*======================================================================*\
|
---|
| 670 | Function: _striptext
|
---|
| 671 | Purpose: strip the text from an html document
|
---|
| 672 | Input: $document document to strip.
|
---|
| 673 | Output: $text the resulting text
|
---|
| 674 | \*======================================================================*/
|
---|
| 675 |
|
---|
| 676 | function _striptext($document)
|
---|
| 677 | {
|
---|
| 678 |
|
---|
| 679 | // I didn't use preg eval (//e) since that is only available in PHP 4.0.
|
---|
| 680 | // so, list your entities one by one here. I included some of the
|
---|
| 681 | // more common ones.
|
---|
| 682 |
|
---|
| 683 | $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
|
---|
| 684 | "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
|
---|
| 685 | "'([\r\n])[\s]+'", // strip out white space
|
---|
| 686 | "'&(quot|#34|#034|#x22);'i", // replace html entities
|
---|
| 687 | "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
|
---|
| 688 | "'&(lt|#60|#060|#x3c);'i",
|
---|
| 689 | "'&(gt|#62|#062|#x3e);'i",
|
---|
| 690 | "'&(nbsp|#160|#xa0);'i",
|
---|
| 691 | "'&(iexcl|#161);'i",
|
---|
| 692 | "'&(cent|#162);'i",
|
---|
| 693 | "'&(pound|#163);'i",
|
---|
| 694 | "'&(copy|#169);'i",
|
---|
| 695 | "'&(reg|#174);'i",
|
---|
| 696 | "'&(deg|#176);'i",
|
---|
| 697 | "'&(#39|#039|#x27);'",
|
---|
| 698 | "'&(euro|#8364);'i", // europe
|
---|
| 699 | "'&a(uml|UML);'", // german
|
---|
| 700 | "'&o(uml|UML);'",
|
---|
| 701 | "'&u(uml|UML);'",
|
---|
| 702 | "'&A(uml|UML);'",
|
---|
| 703 | "'&O(uml|UML);'",
|
---|
| 704 | "'&U(uml|UML);'",
|
---|
| 705 | "'ß'i",
|
---|
| 706 | );
|
---|
| 707 | $replace = array( "",
|
---|
| 708 | "",
|
---|
| 709 | "\\1",
|
---|
| 710 | "\"",
|
---|
| 711 | "&",
|
---|
| 712 | "<",
|
---|
| 713 | ">",
|
---|
| 714 | " ",
|
---|
| 715 | chr(161),
|
---|
| 716 | chr(162),
|
---|
| 717 | chr(163),
|
---|
| 718 | chr(169),
|
---|
| 719 | chr(174),
|
---|
| 720 | chr(176),
|
---|
| 721 | chr(39),
|
---|
| 722 | chr(128),
|
---|
| 723 | "À",
|
---|
| 724 | "ö",
|
---|
| 725 | "Ì",
|
---|
| 726 | "Ã",
|
---|
| 727 | "Ã",
|
---|
| 728 | "Ã",
|
---|
| 729 | "Ã",
|
---|
| 730 | );
|
---|
| 731 |
|
---|
| 732 | $text = preg_replace($search,$replace,$document);
|
---|
| 733 |
|
---|
| 734 | return $text;
|
---|
| 735 | }
|
---|
| 736 |
|
---|
| 737 | /*======================================================================*\
|
---|
| 738 | Function: _expandlinks
|
---|
| 739 | Purpose: expand each link into a fully qualified URL
|
---|
| 740 | Input: $links the links to qualify
|
---|
| 741 | $URI the full URI to get the base from
|
---|
| 742 | Output: $expandedLinks the expanded links
|
---|
| 743 | \*======================================================================*/
|
---|
| 744 |
|
---|
| 745 | function _expandlinks($links,$URI)
|
---|
| 746 | {
|
---|
| 747 |
|
---|
| 748 | preg_match("/^[^\?]+/",$URI,$match);
|
---|
| 749 |
|
---|
| 750 | $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
|
---|
| 751 | $match = preg_replace("|/$|","",$match);
|
---|
| 752 | $match_part = parse_url($match);
|
---|
| 753 | $match_root =
|
---|
| 754 | $match_part["scheme"]."://".$match_part["host"];
|
---|
| 755 |
|
---|
| 756 | $search = array( "|^http://".preg_quote($this->host)."|i",
|
---|
| 757 | "|^(\/)|i",
|
---|
| 758 | "|^(?!http://)(?!mailto:)|i",
|
---|
| 759 | "|/\./|",
|
---|
| 760 | "|/[^\/]+/\.\./|"
|
---|
| 761 | );
|
---|
| 762 |
|
---|
| 763 | $replace = array( "",
|
---|
| 764 | $match_root."/",
|
---|
| 765 | $match."/",
|
---|
| 766 | "/",
|
---|
| 767 | "/"
|
---|
| 768 | );
|
---|
| 769 |
|
---|
| 770 | $expandedLinks = preg_replace($search,$replace,$links);
|
---|
| 771 |
|
---|
| 772 | return $expandedLinks;
|
---|
| 773 | }
|
---|
| 774 |
|
---|
| 775 | /*======================================================================*\
|
---|
| 776 | Function: _httprequest
|
---|
| 777 | Purpose: go get the http data from the server
|
---|
| 778 | Input: $url the url to fetch
|
---|
| 779 | $fp the current open file pointer
|
---|
| 780 | $URI the full URI
|
---|
| 781 | $body body contents to send if any (POST)
|
---|
| 782 | Output:
|
---|
| 783 | \*======================================================================*/
|
---|
| 784 |
|
---|
| 785 | function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
|
---|
| 786 | {
|
---|
| 787 | $cookie_headers = '';
|
---|
| 788 | if($this->passcookies && $this->_redirectaddr)
|
---|
| 789 | $this->setcookies();
|
---|
| 790 |
|
---|
| 791 | $URI_PARTS = parse_url($URI);
|
---|
| 792 | if(empty($url))
|
---|
| 793 | $url = "/";
|
---|
| 794 | $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
|
---|
| 795 | if(!empty($this->agent))
|
---|
| 796 | $headers .= "User-Agent: ".$this->agent."\r\n";
|
---|
| 797 | if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
|
---|
| 798 | $headers .= "Host: ".$this->host;
|
---|
| 799 | if(!empty($this->port) && $this->port != 80)
|
---|
| 800 | $headers .= ":".$this->port;
|
---|
| 801 | $headers .= "\r\n";
|
---|
| 802 | }
|
---|
| 803 | if(!empty($this->accept))
|
---|
| 804 | $headers .= "Accept: ".$this->accept."\r\n";
|
---|
| 805 | if(!empty($this->referer))
|
---|
| 806 | $headers .= "Referer: ".$this->referer."\r\n";
|
---|
| 807 | if(!empty($this->cookies))
|
---|
| 808 | {
|
---|
| 809 | if(!is_array($this->cookies))
|
---|
| 810 | $this->cookies = (array)$this->cookies;
|
---|
| 811 |
|
---|
| 812 | reset($this->cookies);
|
---|
| 813 | if ( count($this->cookies) > 0 ) {
|
---|
| 814 | $cookie_headers .= 'Cookie: ';
|
---|
| 815 | foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
---|
| 816 | $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
|
---|
| 817 | }
|
---|
| 818 | $headers .= substr($cookie_headers,0,-2) . "\r\n";
|
---|
| 819 | }
|
---|
| 820 | }
|
---|
| 821 | if(!empty($this->rawheaders))
|
---|
| 822 | {
|
---|
| 823 | if(!is_array($this->rawheaders))
|
---|
| 824 | $this->rawheaders = (array)$this->rawheaders;
|
---|
| 825 | while(list($headerKey,$headerVal) = each($this->rawheaders))
|
---|
| 826 | $headers .= $headerKey.": ".$headerVal."\r\n";
|
---|
| 827 | }
|
---|
| 828 | if(!empty($content_type)) {
|
---|
| 829 | $headers .= "Content-type: $content_type";
|
---|
| 830 | if ($content_type == "multipart/form-data")
|
---|
| 831 | $headers .= "; boundary=".$this->_mime_boundary;
|
---|
| 832 | $headers .= "\r\n";
|
---|
| 833 | }
|
---|
| 834 | if(!empty($body))
|
---|
| 835 | $headers .= "Content-length: ".strlen($body)."\r\n";
|
---|
| 836 | if(!empty($this->user) || !empty($this->pass))
|
---|
| 837 | $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
|
---|
| 838 |
|
---|
| 839 | //add proxy auth headers
|
---|
| 840 | if(!empty($this->proxy_user))
|
---|
| 841 | $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
|
---|
| 842 |
|
---|
| 843 |
|
---|
| 844 | $headers .= "\r\n";
|
---|
| 845 |
|
---|
| 846 | // set the read timeout if needed
|
---|
| 847 | if ($this->read_timeout > 0)
|
---|
| 848 | socket_set_timeout($fp, $this->read_timeout);
|
---|
| 849 | $this->timed_out = false;
|
---|
| 850 |
|
---|
| 851 | fwrite($fp,$headers.$body,strlen($headers.$body));
|
---|
| 852 |
|
---|
| 853 | $this->_redirectaddr = false;
|
---|
| 854 | unset($this->headers);
|
---|
| 855 |
|
---|
| 856 | while($currentHeader = fgets($fp,$this->_maxlinelen))
|
---|
| 857 | {
|
---|
| 858 | if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
---|
| 859 | {
|
---|
| 860 | $this->status=-100;
|
---|
| 861 | return false;
|
---|
| 862 | }
|
---|
| 863 |
|
---|
| 864 | if($currentHeader == "\r\n")
|
---|
| 865 | break;
|
---|
| 866 |
|
---|
| 867 | // if a header begins with Location: or URI:, set the redirect
|
---|
| 868 | if(preg_match("/^(Location:|URI:)/i",$currentHeader))
|
---|
| 869 | {
|
---|
| 870 | // get URL portion of the redirect
|
---|
| 871 | preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
|
---|
| 872 | // look for :// in the Location header to see if hostname is included
|
---|
| 873 | if(!preg_match("|\:\/\/|",$matches[2]))
|
---|
| 874 | {
|
---|
| 875 | // no host in the path, so prepend
|
---|
| 876 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
---|
| 877 | // eliminate double slash
|
---|
| 878 | if(!preg_match("|^/|",$matches[2]))
|
---|
| 879 | $this->_redirectaddr .= "/".$matches[2];
|
---|
| 880 | else
|
---|
| 881 | $this->_redirectaddr .= $matches[2];
|
---|
| 882 | }
|
---|
| 883 | else
|
---|
| 884 | $this->_redirectaddr = $matches[2];
|
---|
| 885 | }
|
---|
| 886 |
|
---|
| 887 | if(preg_match("|^HTTP/|",$currentHeader))
|
---|
| 888 | {
|
---|
| 889 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
|
---|
| 890 | {
|
---|
| 891 | $this->status= $status[1];
|
---|
| 892 | }
|
---|
| 893 | $this->response_code = $currentHeader;
|
---|
| 894 | }
|
---|
| 895 |
|
---|
| 896 | $this->headers[] = $currentHeader;
|
---|
| 897 | }
|
---|
| 898 |
|
---|
| 899 | $results = '';
|
---|
| 900 | do {
|
---|
| 901 | $_data = fread($fp, $this->maxlength);
|
---|
| 902 | if (strlen($_data) == 0) {
|
---|
| 903 | break;
|
---|
| 904 | }
|
---|
| 905 | $results .= $_data;
|
---|
| 906 | } while(true);
|
---|
| 907 |
|
---|
| 908 | if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
---|
| 909 | {
|
---|
| 910 | $this->status=-100;
|
---|
| 911 | return false;
|
---|
| 912 | }
|
---|
| 913 |
|
---|
| 914 | // check if there is a a redirect meta tag
|
---|
| 915 |
|
---|
| 916 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
---|
| 917 |
|
---|
| 918 | {
|
---|
| 919 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
---|
| 920 | }
|
---|
| 921 |
|
---|
| 922 | // have we hit our frame depth and is there frame src to fetch?
|
---|
| 923 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
---|
| 924 | {
|
---|
| 925 | $this->results[] = $results;
|
---|
| 926 | for($x=0; $x<count($match[1]); $x++)
|
---|
| 927 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
---|
| 928 | }
|
---|
| 929 | // have we already fetched framed content?
|
---|
| 930 | elseif(is_array($this->results))
|
---|
| 931 | $this->results[] = $results;
|
---|
| 932 | // no framed content
|
---|
| 933 | else
|
---|
| 934 | $this->results = $results;
|
---|
| 935 |
|
---|
| 936 | return true;
|
---|
| 937 | }
|
---|
| 938 |
|
---|
| 939 | /*======================================================================*\
|
---|
| 940 | Function: _httpsrequest
|
---|
| 941 | Purpose: go get the https data from the server using curl
|
---|
| 942 | Input: $url the url to fetch
|
---|
| 943 | $URI the full URI
|
---|
| 944 | $body body contents to send if any (POST)
|
---|
| 945 | Output:
|
---|
| 946 | \*======================================================================*/
|
---|
| 947 |
|
---|
| 948 | function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
|
---|
| 949 | {
|
---|
| 950 | if($this->passcookies && $this->_redirectaddr)
|
---|
| 951 | $this->setcookies();
|
---|
| 952 |
|
---|
| 953 | $headers = array();
|
---|
| 954 |
|
---|
| 955 | $URI_PARTS = parse_url($URI);
|
---|
| 956 | if(empty($url))
|
---|
| 957 | $url = "/";
|
---|
| 958 | // GET ... header not needed for curl
|
---|
| 959 | //$headers[] = $http_method." ".$url." ".$this->_httpversion;
|
---|
| 960 | if(!empty($this->agent))
|
---|
| 961 | $headers[] = "User-Agent: ".$this->agent;
|
---|
| 962 | if(!empty($this->host))
|
---|
| 963 | if(!empty($this->port))
|
---|
| 964 | $headers[] = "Host: ".$this->host.":".$this->port;
|
---|
| 965 | else
|
---|
| 966 | $headers[] = "Host: ".$this->host;
|
---|
| 967 | if(!empty($this->accept))
|
---|
| 968 | $headers[] = "Accept: ".$this->accept;
|
---|
| 969 | if(!empty($this->referer))
|
---|
| 970 | $headers[] = "Referer: ".$this->referer;
|
---|
| 971 | if(!empty($this->cookies))
|
---|
| 972 | {
|
---|
| 973 | if(!is_array($this->cookies))
|
---|
| 974 | $this->cookies = (array)$this->cookies;
|
---|
| 975 |
|
---|
| 976 | reset($this->cookies);
|
---|
| 977 | if ( count($this->cookies) > 0 ) {
|
---|
| 978 | $cookie_str = 'Cookie: ';
|
---|
| 979 | foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
---|
| 980 | $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
|
---|
| 981 | }
|
---|
| 982 | $headers[] = substr($cookie_str,0,-2);
|
---|
| 983 | }
|
---|
| 984 | }
|
---|
| 985 | if(!empty($this->rawheaders))
|
---|
| 986 | {
|
---|
| 987 | if(!is_array($this->rawheaders))
|
---|
| 988 | $this->rawheaders = (array)$this->rawheaders;
|
---|
| 989 | while(list($headerKey,$headerVal) = each($this->rawheaders))
|
---|
| 990 | $headers[] = $headerKey.": ".$headerVal;
|
---|
| 991 | }
|
---|
| 992 | if(!empty($content_type)) {
|
---|
| 993 | if ($content_type == "multipart/form-data")
|
---|
| 994 | $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
|
---|
| 995 | else
|
---|
| 996 | $headers[] = "Content-type: $content_type";
|
---|
| 997 | }
|
---|
| 998 | if(!empty($body))
|
---|
| 999 | $headers[] = "Content-length: ".strlen($body);
|
---|
| 1000 | if(!empty($this->user) || !empty($this->pass))
|
---|
| 1001 | $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
|
---|
| 1002 |
|
---|
| 1003 | for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
|
---|
| 1004 | $safer_header = strtr( $headers[$curr_header], "\"", " " );
|
---|
| 1005 | $cmdline_params .= " -H \"".$safer_header."\"";
|
---|
| 1006 | }
|
---|
| 1007 |
|
---|
| 1008 | if(!empty($body))
|
---|
| 1009 | $cmdline_params .= " -d \"$body\"";
|
---|
| 1010 |
|
---|
| 1011 | if($this->read_timeout > 0)
|
---|
| 1012 | $cmdline_params .= " -m ".$this->read_timeout;
|
---|
| 1013 |
|
---|
| 1014 | $headerfile = tempnam($temp_dir, "sno");
|
---|
| 1015 |
|
---|
| 1016 | $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
|
---|
| 1017 | exec(escapeshellcmd($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\""),$results,$return);
|
---|
| 1018 |
|
---|
| 1019 | if($return)
|
---|
| 1020 | {
|
---|
| 1021 | $this->error = "Error: cURL could not retrieve the document, error $return.";
|
---|
| 1022 | return false;
|
---|
| 1023 | }
|
---|
| 1024 |
|
---|
| 1025 |
|
---|
| 1026 | $results = implode("\r\n",$results);
|
---|
| 1027 |
|
---|
| 1028 | $result_headers = file("$headerfile");
|
---|
| 1029 |
|
---|
| 1030 | $this->_redirectaddr = false;
|
---|
| 1031 | unset($this->headers);
|
---|
| 1032 |
|
---|
| 1033 | for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
|
---|
| 1034 | {
|
---|
| 1035 |
|
---|
| 1036 | // if a header begins with Location: or URI:, set the redirect
|
---|
| 1037 | if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
|
---|
| 1038 | {
|
---|
| 1039 | // get URL portion of the redirect
|
---|
| 1040 | preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
|
---|
| 1041 | // look for :// in the Location header to see if hostname is included
|
---|
| 1042 | if(!preg_match("|\:\/\/|",$matches[2]))
|
---|
| 1043 | {
|
---|
| 1044 | // no host in the path, so prepend
|
---|
| 1045 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
---|
| 1046 | // eliminate double slash
|
---|
| 1047 | if(!preg_match("|^/|",$matches[2]))
|
---|
| 1048 | $this->_redirectaddr .= "/".$matches[2];
|
---|
| 1049 | else
|
---|
| 1050 | $this->_redirectaddr .= $matches[2];
|
---|
| 1051 | }
|
---|
| 1052 | else
|
---|
| 1053 | $this->_redirectaddr = $matches[2];
|
---|
| 1054 | }
|
---|
| 1055 |
|
---|
| 1056 | if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
|
---|
| 1057 | $this->response_code = $result_headers[$currentHeader];
|
---|
| 1058 |
|
---|
| 1059 | $this->headers[] = $result_headers[$currentHeader];
|
---|
| 1060 | }
|
---|
| 1061 |
|
---|
| 1062 | // check if there is a a redirect meta tag
|
---|
| 1063 |
|
---|
| 1064 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
---|
| 1065 | {
|
---|
| 1066 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
---|
| 1067 | }
|
---|
| 1068 |
|
---|
| 1069 | // have we hit our frame depth and is there frame src to fetch?
|
---|
| 1070 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
---|
| 1071 | {
|
---|
| 1072 | $this->results[] = $results;
|
---|
| 1073 | for($x=0; $x<count($match[1]); $x++)
|
---|
| 1074 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
---|
| 1075 | }
|
---|
| 1076 | // have we already fetched framed content?
|
---|
| 1077 | elseif(is_array($this->results))
|
---|
| 1078 | $this->results[] = $results;
|
---|
| 1079 | // no framed content
|
---|
| 1080 | else
|
---|
| 1081 | $this->results = $results;
|
---|
| 1082 |
|
---|
| 1083 | unlink("$headerfile");
|
---|
| 1084 |
|
---|
| 1085 | return true;
|
---|
| 1086 | }
|
---|
| 1087 |
|
---|
| 1088 | /*======================================================================*\
|
---|
| 1089 | Function: setcookies()
|
---|
| 1090 | Purpose: set cookies for a redirection
|
---|
| 1091 | \*======================================================================*/
|
---|
| 1092 |
|
---|
| 1093 | function setcookies()
|
---|
| 1094 | {
|
---|
| 1095 | for($x=0; $x<count($this->headers); $x++)
|
---|
| 1096 | {
|
---|
| 1097 | if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
|
---|
| 1098 | $this->cookies[$match[1]] = urldecode($match[2]);
|
---|
| 1099 | }
|
---|
| 1100 | }
|
---|
| 1101 |
|
---|
| 1102 |
|
---|
| 1103 | /*======================================================================*\
|
---|
| 1104 | Function: _check_timeout
|
---|
| 1105 | Purpose: checks whether timeout has occurred
|
---|
| 1106 | Input: $fp file pointer
|
---|
| 1107 | \*======================================================================*/
|
---|
| 1108 |
|
---|
| 1109 | function _check_timeout($fp)
|
---|
| 1110 | {
|
---|
| 1111 | if ($this->read_timeout > 0) {
|
---|
| 1112 | $fp_status = socket_get_status($fp);
|
---|
| 1113 | if ($fp_status["timed_out"]) {
|
---|
| 1114 | $this->timed_out = true;
|
---|
| 1115 | return true;
|
---|
| 1116 | }
|
---|
| 1117 | }
|
---|
| 1118 | return false;
|
---|
| 1119 | }
|
---|
| 1120 |
|
---|
| 1121 | /*======================================================================*\
|
---|
| 1122 | Function: _connect
|
---|
| 1123 | Purpose: make a socket connection
|
---|
| 1124 | Input: $fp file pointer
|
---|
| 1125 | \*======================================================================*/
|
---|
| 1126 |
|
---|
| 1127 | function _connect(&$fp)
|
---|
| 1128 | {
|
---|
| 1129 | if(!empty($this->proxy_host) && !empty($this->proxy_port))
|
---|
| 1130 | {
|
---|
| 1131 | $this->_isproxy = true;
|
---|
| 1132 |
|
---|
| 1133 | $host = $this->proxy_host;
|
---|
| 1134 | $port = $this->proxy_port;
|
---|
| 1135 | }
|
---|
| 1136 | else
|
---|
| 1137 | {
|
---|
| 1138 | $host = $this->host;
|
---|
| 1139 | $port = $this->port;
|
---|
| 1140 | }
|
---|
| 1141 |
|
---|
| 1142 | $this->status = 0;
|
---|
| 1143 |
|
---|
| 1144 | if($fp = fsockopen(
|
---|
| 1145 | $host,
|
---|
| 1146 | $port,
|
---|
| 1147 | $errno,
|
---|
| 1148 | $errstr,
|
---|
| 1149 | $this->_fp_timeout
|
---|
| 1150 | ))
|
---|
| 1151 | {
|
---|
| 1152 | // socket connection succeeded
|
---|
| 1153 |
|
---|
| 1154 | return true;
|
---|
| 1155 | }
|
---|
| 1156 | else
|
---|
| 1157 | {
|
---|
| 1158 | // socket connection failed
|
---|
| 1159 | $this->status = $errno;
|
---|
| 1160 | switch($errno)
|
---|
| 1161 | {
|
---|
| 1162 | case -3:
|
---|
| 1163 | $this->error="socket creation failed (-3)";
|
---|
| 1164 | case -4:
|
---|
| 1165 | $this->error="dns lookup failure (-4)";
|
---|
| 1166 | case -5:
|
---|
| 1167 | $this->error="connection refused or timed out (-5)";
|
---|
| 1168 | default:
|
---|
| 1169 | $this->error="connection failed (".$errno.")";
|
---|
| 1170 | }
|
---|
| 1171 | return false;
|
---|
| 1172 | }
|
---|
| 1173 | }
|
---|
| 1174 | /*======================================================================*\
|
---|
| 1175 | Function: _disconnect
|
---|
| 1176 | Purpose: disconnect a socket connection
|
---|
| 1177 | Input: $fp file pointer
|
---|
| 1178 | \*======================================================================*/
|
---|
| 1179 |
|
---|
| 1180 | function _disconnect($fp)
|
---|
| 1181 | {
|
---|
| 1182 | return(fclose($fp));
|
---|
| 1183 | }
|
---|
| 1184 |
|
---|
| 1185 |
|
---|
| 1186 | /*======================================================================*\
|
---|
| 1187 | Function: _prepare_post_body
|
---|
| 1188 | Purpose: Prepare post body according to encoding type
|
---|
| 1189 | Input: $formvars - form variables
|
---|
| 1190 | $formfiles - form upload files
|
---|
| 1191 | Output: post body
|
---|
| 1192 | \*======================================================================*/
|
---|
| 1193 |
|
---|
| 1194 | function _prepare_post_body($formvars, $formfiles)
|
---|
| 1195 | {
|
---|
| 1196 | settype($formvars, "array");
|
---|
| 1197 | settype($formfiles, "array");
|
---|
| 1198 | $postdata = '';
|
---|
| 1199 |
|
---|
| 1200 | if (count($formvars) == 0 && count($formfiles) == 0)
|
---|
| 1201 | return;
|
---|
| 1202 |
|
---|
| 1203 | switch ($this->_submit_type) {
|
---|
| 1204 | case "application/x-www-form-urlencoded":
|
---|
| 1205 | reset($formvars);
|
---|
| 1206 | while(list($key,$val) = each($formvars)) {
|
---|
| 1207 | if (is_array($val) || is_object($val)) {
|
---|
| 1208 | while (list($cur_key, $cur_val) = each($val)) {
|
---|
| 1209 | $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
|
---|
| 1210 | }
|
---|
| 1211 | } else
|
---|
| 1212 | $postdata .= urlencode($key)."=".urlencode($val)."&";
|
---|
| 1213 | }
|
---|
| 1214 | break;
|
---|
| 1215 |
|
---|
| 1216 | case "multipart/form-data":
|
---|
| 1217 | $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
|
---|
| 1218 |
|
---|
| 1219 | reset($formvars);
|
---|
| 1220 | while(list($key,$val) = each($formvars)) {
|
---|
| 1221 | if (is_array($val) || is_object($val)) {
|
---|
| 1222 | while (list($cur_key, $cur_val) = each($val)) {
|
---|
| 1223 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
| 1224 | $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
|
---|
| 1225 | $postdata .= "$cur_val\r\n";
|
---|
| 1226 | }
|
---|
| 1227 | } else {
|
---|
| 1228 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
| 1229 | $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
|
---|
| 1230 | $postdata .= "$val\r\n";
|
---|
| 1231 | }
|
---|
| 1232 | }
|
---|
| 1233 |
|
---|
| 1234 | reset($formfiles);
|
---|
| 1235 | while (list($field_name, $file_names) = each($formfiles)) {
|
---|
| 1236 | settype($file_names, "array");
|
---|
| 1237 | while (list(, $file_name) = each($file_names)) {
|
---|
| 1238 | if (!is_readable($file_name)) continue;
|
---|
| 1239 |
|
---|
| 1240 | $fp = fopen($file_name, "r");
|
---|
| 1241 | while (!feof($fp)) {
|
---|
| 1242 | $file_content .= fread($fp, filesize($file_name));
|
---|
| 1243 | }
|
---|
| 1244 | fclose($fp);
|
---|
| 1245 | $base_name = basename($file_name);
|
---|
| 1246 |
|
---|
| 1247 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
| 1248 | $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
|
---|
| 1249 | $postdata .= "$file_content\r\n";
|
---|
| 1250 | }
|
---|
| 1251 | }
|
---|
| 1252 | $postdata .= "--".$this->_mime_boundary."--\r\n";
|
---|
| 1253 | break;
|
---|
| 1254 | }
|
---|
| 1255 |
|
---|
| 1256 | return $postdata;
|
---|
| 1257 | }
|
---|
| 1258 | }
|
---|
| 1259 | endif;
|
---|
| 1260 |
|
---|
| 1261 | ?>
|
---|