[44] | 1 | <?php
|
---|
| 2 | if ( !in_array('Snoopy', get_declared_classes() ) ) :
|
---|
| 3 | /*************************************************
|
---|
| 4 |
|
---|
| 5 | Snoopy - the PHP net client
|
---|
| 6 | Author: Monte Ohrt <monte@ispi.net>
|
---|
| 7 | Copyright (c): 1999-2008 New Digital Group, all rights reserved
|
---|
| 8 | Version: 1.2.4
|
---|
| 9 |
|
---|
| 10 | * This library is free software; you can redistribute it and/or
|
---|
| 11 | * modify it under the terms of the GNU Lesser General Public
|
---|
| 12 | * License as published by the Free Software Foundation; either
|
---|
| 13 | * version 2.1 of the License, or (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This library is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 18 | * Lesser General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU Lesser General Public
|
---|
| 21 | * License along with this library; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
---|
| 23 |
|
---|
| 24 | You may contact the author of Snoopy by e-mail at:
|
---|
| 25 | monte@ohrt.com
|
---|
| 26 |
|
---|
| 27 | The latest version of Snoopy can be obtained from:
|
---|
| 28 | http://snoopy.sourceforge.net/
|
---|
| 29 |
|
---|
| 30 | *************************************************/
|
---|
| 31 |
|
---|
| 32 | class Snoopy
|
---|
| 33 | {
|
---|
| 34 | /**** Public variables ****/
|
---|
| 35 |
|
---|
| 36 | /* user definable vars */
|
---|
| 37 |
|
---|
| 38 | var $host = "www.php.net"; // host name we are connecting to
|
---|
| 39 | var $port = 80; // port we are connecting to
|
---|
| 40 | var $proxy_host = ""; // proxy host to use
|
---|
| 41 | var $proxy_port = ""; // proxy port to use
|
---|
| 42 | var $proxy_user = ""; // proxy user to use
|
---|
| 43 | var $proxy_pass = ""; // proxy password to use
|
---|
| 44 |
|
---|
| 45 | var $agent = "Snoopy v1.2.4"; // agent we masquerade as
|
---|
| 46 | var $referer = ""; // referer info to pass
|
---|
| 47 | var $cookies = array(); // array of cookies to pass
|
---|
| 48 | // $cookies["username"]="joe";
|
---|
| 49 | var $rawheaders = array(); // array of raw headers to send
|
---|
| 50 | // $rawheaders["Content-type"]="text/html";
|
---|
| 51 |
|
---|
| 52 | var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
|
---|
| 53 | var $lastredirectaddr = ""; // contains address of last redirected address
|
---|
| 54 | var $offsiteok = true; // allows redirection off-site
|
---|
| 55 | var $maxframes = 0; // frame content depth maximum. 0 = disallow
|
---|
| 56 | var $expandlinks = true; // expand links to fully qualified URLs.
|
---|
| 57 | // this only applies to fetchlinks()
|
---|
| 58 | // submitlinks(), and submittext()
|
---|
| 59 | var $passcookies = true; // pass set cookies back through redirects
|
---|
| 60 | // NOTE: this currently does not respect
|
---|
| 61 | // dates, domains or paths.
|
---|
| 62 |
|
---|
| 63 | var $user = ""; // user for http authentication
|
---|
| 64 | var $pass = ""; // password for http authentication
|
---|
| 65 |
|
---|
| 66 | // http accept types
|
---|
| 67 | var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
|
---|
| 68 |
|
---|
| 69 | var $results = ""; // where the content is put
|
---|
| 70 |
|
---|
| 71 | var $error = ""; // error messages sent here
|
---|
| 72 | var $response_code = ""; // response code returned from server
|
---|
| 73 | var $headers = array(); // headers returned from server sent here
|
---|
| 74 | var $maxlength = 500000; // max return data length (body)
|
---|
| 75 | var $read_timeout = 0; // timeout on read operations, in seconds
|
---|
| 76 | // supported only since PHP 4 Beta 4
|
---|
| 77 | // set to 0 to disallow timeouts
|
---|
| 78 | var $timed_out = false; // if a read operation timed out
|
---|
| 79 | var $status = 0; // http request status
|
---|
| 80 |
|
---|
| 81 | var $temp_dir = "/tmp"; // temporary directory that the webserver
|
---|
| 82 | // has permission to write to.
|
---|
| 83 | // under Windows, this should be C:\temp
|
---|
| 84 |
|
---|
| 85 | var $curl_path = "/usr/local/bin/curl";
|
---|
| 86 | // Snoopy will use cURL for fetching
|
---|
| 87 | // SSL content if a full system path to
|
---|
| 88 | // the cURL binary is supplied here.
|
---|
| 89 | // set to false if you do not have
|
---|
| 90 | // cURL installed. See http://curl.haxx.se
|
---|
| 91 | // for details on installing cURL.
|
---|
| 92 | // Snoopy does *not* use the cURL
|
---|
| 93 | // library functions built into php,
|
---|
| 94 | // as these functions are not stable
|
---|
| 95 | // as of this Snoopy release.
|
---|
| 96 |
|
---|
| 97 | /**** Private variables ****/
|
---|
| 98 |
|
---|
| 99 | var $_maxlinelen = 4096; // max line length (headers)
|
---|
| 100 |
|
---|
| 101 | var $_httpmethod = "GET"; // default http request method
|
---|
| 102 | var $_httpversion = "HTTP/1.0"; // default http request version
|
---|
| 103 | var $_submit_method = "POST"; // default submit method
|
---|
| 104 | var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
|
---|
| 105 | var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
|
---|
| 106 | var $_redirectaddr = false; // will be set if page fetched is a redirect
|
---|
| 107 | var $_redirectdepth = 0; // increments on an http redirect
|
---|
| 108 | var $_frameurls = array(); // frame src urls
|
---|
| 109 | var $_framedepth = 0; // increments on frame depth
|
---|
| 110 |
|
---|
| 111 | var $_isproxy = false; // set if using a proxy server
|
---|
| 112 | var $_fp_timeout = 30; // timeout for socket connection
|
---|
| 113 |
|
---|
| 114 | /*======================================================================*\
|
---|
| 115 | Function: fetch
|
---|
| 116 | Purpose: fetch the contents of a web page
|
---|
| 117 | (and possibly other protocols in the
|
---|
| 118 | future like ftp, nntp, gopher, etc.)
|
---|
| 119 | Input: $URI the location of the page to fetch
|
---|
| 120 | Output: $this->results the output text from the fetch
|
---|
| 121 | \*======================================================================*/
|
---|
| 122 |
|
---|
| 123 | function fetch($URI)
|
---|
| 124 | {
|
---|
| 125 |
|
---|
| 126 | //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
|
---|
| 127 | $URI_PARTS = parse_url($URI);
|
---|
| 128 | if (!empty($URI_PARTS["user"]))
|
---|
| 129 | $this->user = $URI_PARTS["user"];
|
---|
| 130 | if (!empty($URI_PARTS["pass"]))
|
---|
| 131 | $this->pass = $URI_PARTS["pass"];
|
---|
| 132 | if (empty($URI_PARTS["query"]))
|
---|
| 133 | $URI_PARTS["query"] = '';
|
---|
| 134 | if (empty($URI_PARTS["path"]))
|
---|
| 135 | $URI_PARTS["path"] = '';
|
---|
| 136 |
|
---|
| 137 | switch(strtolower($URI_PARTS["scheme"]))
|
---|
| 138 | {
|
---|
| 139 | case "http":
|
---|
| 140 | $this->host = $URI_PARTS["host"];
|
---|
| 141 | if(!empty($URI_PARTS["port"]))
|
---|
| 142 | $this->port = $URI_PARTS["port"];
|
---|
| 143 | if($this->_connect($fp))
|
---|
| 144 | {
|
---|
| 145 | if($this->_isproxy)
|
---|
| 146 | {
|
---|
| 147 | // using proxy, send entire URI
|
---|
| 148 | $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
|
---|
| 149 | }
|
---|
| 150 | else
|
---|
| 151 | {
|
---|
| 152 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 153 | // no proxy, send only the path
|
---|
| 154 | $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
|
---|
| 155 | }
|
---|
| 156 |
|
---|
| 157 | $this->_disconnect($fp);
|
---|
| 158 |
|
---|
| 159 | if($this->_redirectaddr)
|
---|
| 160 | {
|
---|
| 161 | /* url was redirected, check if we've hit the max depth */
|
---|
| 162 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 163 | {
|
---|
| 164 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 165 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 166 | {
|
---|
| 167 | /* follow the redirect */
|
---|
| 168 | $this->_redirectdepth++;
|
---|
| 169 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 170 | $this->fetch($this->_redirectaddr);
|
---|
| 171 | }
|
---|
| 172 | }
|
---|
| 173 | }
|
---|
| 174 |
|
---|
| 175 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 176 | {
|
---|
| 177 | $frameurls = $this->_frameurls;
|
---|
| 178 | $this->_frameurls = array();
|
---|
| 179 |
|
---|
| 180 | while(list(,$frameurl) = each($frameurls))
|
---|
| 181 | {
|
---|
| 182 | if($this->_framedepth < $this->maxframes)
|
---|
| 183 | {
|
---|
| 184 | $this->fetch($frameurl);
|
---|
| 185 | $this->_framedepth++;
|
---|
| 186 | }
|
---|
| 187 | else
|
---|
| 188 | break;
|
---|
| 189 | }
|
---|
| 190 | }
|
---|
| 191 | }
|
---|
| 192 | else
|
---|
| 193 | {
|
---|
| 194 | return false;
|
---|
| 195 | }
|
---|
| 196 | return true;
|
---|
| 197 | break;
|
---|
| 198 | case "https":
|
---|
| 199 | if(!$this->curl_path)
|
---|
| 200 | return false;
|
---|
| 201 | if(function_exists("is_executable"))
|
---|
| 202 | if (!is_executable($this->curl_path))
|
---|
| 203 | return false;
|
---|
| 204 | $this->host = $URI_PARTS["host"];
|
---|
| 205 | if(!empty($URI_PARTS["port"]))
|
---|
| 206 | $this->port = $URI_PARTS["port"];
|
---|
| 207 | if($this->_isproxy)
|
---|
| 208 | {
|
---|
| 209 | // using proxy, send entire URI
|
---|
| 210 | $this->_httpsrequest($URI,$URI,$this->_httpmethod);
|
---|
| 211 | }
|
---|
| 212 | else
|
---|
| 213 | {
|
---|
| 214 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 215 | // no proxy, send only the path
|
---|
| 216 | $this->_httpsrequest($path, $URI, $this->_httpmethod);
|
---|
| 217 | }
|
---|
| 218 |
|
---|
| 219 | if($this->_redirectaddr)
|
---|
| 220 | {
|
---|
| 221 | /* url was redirected, check if we've hit the max depth */
|
---|
| 222 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 223 | {
|
---|
| 224 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 225 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 226 | {
|
---|
| 227 | /* follow the redirect */
|
---|
| 228 | $this->_redirectdepth++;
|
---|
| 229 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 230 | $this->fetch($this->_redirectaddr);
|
---|
| 231 | }
|
---|
| 232 | }
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 236 | {
|
---|
| 237 | $frameurls = $this->_frameurls;
|
---|
| 238 | $this->_frameurls = array();
|
---|
| 239 |
|
---|
| 240 | while(list(,$frameurl) = each($frameurls))
|
---|
| 241 | {
|
---|
| 242 | if($this->_framedepth < $this->maxframes)
|
---|
| 243 | {
|
---|
| 244 | $this->fetch($frameurl);
|
---|
| 245 | $this->_framedepth++;
|
---|
| 246 | }
|
---|
| 247 | else
|
---|
| 248 | break;
|
---|
| 249 | }
|
---|
| 250 | }
|
---|
| 251 | return true;
|
---|
| 252 | break;
|
---|
| 253 | default:
|
---|
| 254 | // not a valid protocol
|
---|
| 255 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
|
---|
| 256 | return false;
|
---|
| 257 | break;
|
---|
| 258 | }
|
---|
| 259 | return true;
|
---|
| 260 | }
|
---|
| 261 |
|
---|
| 262 | /*======================================================================*\
|
---|
| 263 | Function: submit
|
---|
| 264 | Purpose: submit an http form
|
---|
| 265 | Input: $URI the location to post the data
|
---|
| 266 | $formvars the formvars to use.
|
---|
| 267 | format: $formvars["var"] = "val";
|
---|
| 268 | $formfiles an array of files to submit
|
---|
| 269 | format: $formfiles["var"] = "/dir/filename.ext";
|
---|
| 270 | Output: $this->results the text output from the post
|
---|
| 271 | \*======================================================================*/
|
---|
| 272 |
|
---|
| 273 | function submit($URI, $formvars="", $formfiles="")
|
---|
| 274 | {
|
---|
| 275 | unset($postdata);
|
---|
| 276 |
|
---|
| 277 | $postdata = $this->_prepare_post_body($formvars, $formfiles);
|
---|
| 278 |
|
---|
| 279 | $URI_PARTS = parse_url($URI);
|
---|
| 280 | if (!empty($URI_PARTS["user"]))
|
---|
| 281 | $this->user = $URI_PARTS["user"];
|
---|
| 282 | if (!empty($URI_PARTS["pass"]))
|
---|
| 283 | $this->pass = $URI_PARTS["pass"];
|
---|
| 284 | if (empty($URI_PARTS["query"]))
|
---|
| 285 | $URI_PARTS["query"] = '';
|
---|
| 286 | if (empty($URI_PARTS["path"]))
|
---|
| 287 | $URI_PARTS["path"] = '';
|
---|
| 288 |
|
---|
| 289 | switch(strtolower($URI_PARTS["scheme"]))
|
---|
| 290 | {
|
---|
| 291 | case "http":
|
---|
| 292 | $this->host = $URI_PARTS["host"];
|
---|
| 293 | if(!empty($URI_PARTS["port"]))
|
---|
| 294 | $this->port = $URI_PARTS["port"];
|
---|
| 295 | if($this->_connect($fp))
|
---|
| 296 | {
|
---|
| 297 | if($this->_isproxy)
|
---|
| 298 | {
|
---|
| 299 | // using proxy, send entire URI
|
---|
| 300 | $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
|
---|
| 301 | }
|
---|
| 302 | else
|
---|
| 303 | {
|
---|
| 304 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 305 | // no proxy, send only the path
|
---|
| 306 | $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
|
---|
| 307 | }
|
---|
| 308 |
|
---|
| 309 | $this->_disconnect($fp);
|
---|
| 310 |
|
---|
| 311 | if($this->_redirectaddr)
|
---|
| 312 | {
|
---|
| 313 | /* url was redirected, check if we've hit the max depth */
|
---|
| 314 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 315 | {
|
---|
| 316 | if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
|
---|
| 317 | $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
|
---|
| 318 |
|
---|
| 319 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 320 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 321 | {
|
---|
| 322 | /* follow the redirect */
|
---|
| 323 | $this->_redirectdepth++;
|
---|
| 324 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 325 | if( strpos( $this->_redirectaddr, "?" ) > 0 )
|
---|
| 326 | $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
|
---|
| 327 | else
|
---|
| 328 | $this->submit($this->_redirectaddr,$formvars, $formfiles);
|
---|
| 329 | }
|
---|
| 330 | }
|
---|
| 331 | }
|
---|
| 332 |
|
---|
| 333 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 334 | {
|
---|
| 335 | $frameurls = $this->_frameurls;
|
---|
| 336 | $this->_frameurls = array();
|
---|
| 337 |
|
---|
| 338 | while(list(,$frameurl) = each($frameurls))
|
---|
| 339 | {
|
---|
| 340 | if($this->_framedepth < $this->maxframes)
|
---|
| 341 | {
|
---|
| 342 | $this->fetch($frameurl);
|
---|
| 343 | $this->_framedepth++;
|
---|
| 344 | }
|
---|
| 345 | else
|
---|
| 346 | break;
|
---|
| 347 | }
|
---|
| 348 | }
|
---|
| 349 |
|
---|
| 350 | }
|
---|
| 351 | else
|
---|
| 352 | {
|
---|
| 353 | return false;
|
---|
| 354 | }
|
---|
| 355 | return true;
|
---|
| 356 | break;
|
---|
| 357 | case "https":
|
---|
| 358 | if(!$this->curl_path)
|
---|
| 359 | return false;
|
---|
| 360 | if(function_exists("is_executable"))
|
---|
| 361 | if (!is_executable($this->curl_path))
|
---|
| 362 | return false;
|
---|
| 363 | $this->host = $URI_PARTS["host"];
|
---|
| 364 | if(!empty($URI_PARTS["port"]))
|
---|
| 365 | $this->port = $URI_PARTS["port"];
|
---|
| 366 | if($this->_isproxy)
|
---|
| 367 | {
|
---|
| 368 | // using proxy, send entire URI
|
---|
| 369 | $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
|
---|
| 370 | }
|
---|
| 371 | else
|
---|
| 372 | {
|
---|
| 373 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
| 374 | // no proxy, send only the path
|
---|
| 375 | $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
|
---|
| 376 | }
|
---|
| 377 |
|
---|
| 378 | if($this->_redirectaddr)
|
---|
| 379 | {
|
---|
| 380 | /* url was redirected, check if we've hit the max depth */
|
---|
| 381 | if($this->maxredirs > $this->_redirectdepth)
|
---|
| 382 | {
|
---|
| 383 | if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
|
---|
| 384 | $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
|
---|
| 385 |
|
---|
| 386 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
| 387 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
| 388 | {
|
---|
| 389 | /* follow the redirect */
|
---|
| 390 | $this->_redirectdepth++;
|
---|
| 391 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
| 392 | if( strpos( $this->_redirectaddr, "?" ) > 0 )
|
---|
| 393 | $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
|
---|
| 394 | else
|
---|
| 395 | $this->submit($this->_redirectaddr,$formvars, $formfiles);
|
---|
| 396 | }
|
---|
| 397 | }
|
---|
| 398 | }
|
---|
| 399 |
|
---|
| 400 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
| 401 | {
|
---|
| 402 | $frameurls = $this->_frameurls;
|
---|
| 403 | $this->_frameurls = array();
|
---|
| 404 |
|
---|
| 405 | while(list(,$frameurl) = each($frameurls))
|
---|
| 406 | {
|
---|
| 407 | if($this->_framedepth < $this->maxframes)
|
---|
| 408 | {
|
---|
| 409 | $this->fetch($frameurl);
|
---|
| 410 | $this->_framedepth++;
|
---|
| 411 | }
|
---|
| 412 | else
|
---|
| 413 | break;
|
---|
| 414 | }
|
---|
| 415 | }
|
---|
| 416 | return true;
|
---|
| 417 | break;
|
---|
| 418 |
|
---|
| 419 | default:
|
---|
| 420 | // not a valid protocol
|
---|
| 421 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
|
---|
| 422 | return false;
|
---|
| 423 | break;
|
---|
| 424 | }
|
---|
| 425 | return true;
|
---|
| 426 | }
|
---|
| 427 |
|
---|
| 428 | /*======================================================================*\
|
---|
| 429 | Function: fetchlinks
|
---|
| 430 | Purpose: fetch the links from a web page
|
---|
| 431 | Input: $URI where you are fetching from
|
---|
| 432 | Output: $this->results an array of the URLs
|
---|
| 433 | \*======================================================================*/
|
---|
| 434 |
|
---|
| 435 | function fetchlinks($URI)
|
---|
| 436 | {
|
---|
| 437 | if ($this->fetch($URI))
|
---|
| 438 | {
|
---|
| 439 | if($this->lastredirectaddr)
|
---|
| 440 | $URI = $this->lastredirectaddr;
|
---|
| 441 | if(is_array($this->results))
|
---|
| 442 | {
|
---|
| 443 | for($x=0;$x<count($this->results);$x++)
|
---|
| 444 | $this->results[$x] = $this->_striplinks($this->results[$x]);
|
---|
| 445 | }
|
---|
| 446 | else
|
---|
| 447 | $this->results = $this->_striplinks($this->results);
|
---|
| 448 |
|
---|
| 449 | if($this->expandlinks)
|
---|
| 450 | $this->results = $this->_expandlinks($this->results, $URI);
|
---|
| 451 | return true;
|
---|
| 452 | }
|
---|
| 453 | else
|
---|
| 454 | return false;
|
---|
| 455 | }
|
---|
| 456 |
|
---|
| 457 | /*======================================================================*\
|
---|
| 458 | Function: fetchform
|
---|
| 459 | Purpose: fetch the form elements from a web page
|
---|
| 460 | Input: $URI where you are fetching from
|
---|
| 461 | Output: $this->results the resulting html form
|
---|
| 462 | \*======================================================================*/
|
---|
| 463 |
|
---|
| 464 | function fetchform($URI)
|
---|
| 465 | {
|
---|
| 466 |
|
---|
| 467 | if ($this->fetch($URI))
|
---|
| 468 | {
|
---|
| 469 |
|
---|
| 470 | if(is_array($this->results))
|
---|
| 471 | {
|
---|
| 472 | for($x=0;$x<count($this->results);$x++)
|
---|
| 473 | $this->results[$x] = $this->_stripform($this->results[$x]);
|
---|
| 474 | }
|
---|
| 475 | else
|
---|
| 476 | $this->results = $this->_stripform($this->results);
|
---|
| 477 |
|
---|
| 478 | return true;
|
---|
| 479 | }
|
---|
| 480 | else
|
---|
| 481 | return false;
|
---|
| 482 | }
|
---|
| 483 |
|
---|
| 484 |
|
---|
| 485 | /*======================================================================*\
|
---|
| 486 | Function: fetchtext
|
---|
| 487 | Purpose: fetch the text from a web page, stripping the links
|
---|
| 488 | Input: $URI where you are fetching from
|
---|
| 489 | Output: $this->results the text from the web page
|
---|
| 490 | \*======================================================================*/
|
---|
| 491 |
|
---|
| 492 | function fetchtext($URI)
|
---|
| 493 | {
|
---|
| 494 | if($this->fetch($URI))
|
---|
| 495 | {
|
---|
| 496 | if(is_array($this->results))
|
---|
| 497 | {
|
---|
| 498 | for($x=0;$x<count($this->results);$x++)
|
---|
| 499 | $this->results[$x] = $this->_striptext($this->results[$x]);
|
---|
| 500 | }
|
---|
| 501 | else
|
---|
| 502 | $this->results = $this->_striptext($this->results);
|
---|
| 503 | return true;
|
---|
| 504 | }
|
---|
| 505 | else
|
---|
| 506 | return false;
|
---|
| 507 | }
|
---|
| 508 |
|
---|
| 509 | /*======================================================================*\
|
---|
| 510 | Function: submitlinks
|
---|
| 511 | Purpose: grab links from a form submission
|
---|
| 512 | Input: $URI where you are submitting from
|
---|
| 513 | Output: $this->results an array of the links from the post
|
---|
| 514 | \*======================================================================*/
|
---|
| 515 |
|
---|
| 516 | function submitlinks($URI, $formvars="", $formfiles="")
|
---|
| 517 | {
|
---|
| 518 | if($this->submit($URI,$formvars, $formfiles))
|
---|
| 519 | {
|
---|
| 520 | if($this->lastredirectaddr)
|
---|
| 521 | $URI = $this->lastredirectaddr;
|
---|
| 522 | if(is_array($this->results))
|
---|
| 523 | {
|
---|
| 524 | for($x=0;$x<count($this->results);$x++)
|
---|
| 525 | {
|
---|
| 526 | $this->results[$x] = $this->_striplinks($this->results[$x]);
|
---|
| 527 | if($this->expandlinks)
|
---|
| 528 | $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
|
---|
| 529 | }
|
---|
| 530 | }
|
---|
| 531 | else
|
---|
| 532 | {
|
---|
| 533 | $this->results = $this->_striplinks($this->results);
|
---|
| 534 | if($this->expandlinks)
|
---|
| 535 | $this->results = $this->_expandlinks($this->results,$URI);
|
---|
| 536 | }
|
---|
| 537 | return true;
|
---|
| 538 | }
|
---|
| 539 | else
|
---|
| 540 | return false;
|
---|
| 541 | }
|
---|
| 542 |
|
---|
| 543 | /*======================================================================*\
|
---|
| 544 | Function: submittext
|
---|
| 545 | Purpose: grab text from a form submission
|
---|
| 546 | Input: $URI where you are submitting from
|
---|
| 547 | Output: $this->results the text from the web page
|
---|
| 548 | \*======================================================================*/
|
---|
| 549 |
|
---|
| 550 | function submittext($URI, $formvars = "", $formfiles = "")
|
---|
| 551 | {
|
---|
| 552 | if($this->submit($URI,$formvars, $formfiles))
|
---|
| 553 | {
|
---|
| 554 | if($this->lastredirectaddr)
|
---|
| 555 | $URI = $this->lastredirectaddr;
|
---|
| 556 | if(is_array($this->results))
|
---|
| 557 | {
|
---|
| 558 | for($x=0;$x<count($this->results);$x++)
|
---|
| 559 | {
|
---|
| 560 | $this->results[$x] = $this->_striptext($this->results[$x]);
|
---|
| 561 | if($this->expandlinks)
|
---|
| 562 | $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
|
---|
| 563 | }
|
---|
| 564 | }
|
---|
| 565 | else
|
---|
| 566 | {
|
---|
| 567 | $this->results = $this->_striptext($this->results);
|
---|
| 568 | if($this->expandlinks)
|
---|
| 569 | $this->results = $this->_expandlinks($this->results,$URI);
|
---|
| 570 | }
|
---|
| 571 | return true;
|
---|
| 572 | }
|
---|
| 573 | else
|
---|
| 574 | return false;
|
---|
| 575 | }
|
---|
| 576 |
|
---|
| 577 |
|
---|
| 578 |
|
---|
| 579 | /*======================================================================*\
|
---|
| 580 | Function: set_submit_multipart
|
---|
| 581 | Purpose: Set the form submission content type to
|
---|
| 582 | multipart/form-data
|
---|
| 583 | \*======================================================================*/
|
---|
| 584 | function set_submit_multipart()
|
---|
| 585 | {
|
---|
| 586 | $this->_submit_type = "multipart/form-data";
|
---|
| 587 | }
|
---|
| 588 |
|
---|
| 589 |
|
---|
| 590 | /*======================================================================*\
|
---|
| 591 | Function: set_submit_normal
|
---|
| 592 | Purpose: Set the form submission content type to
|
---|
| 593 | application/x-www-form-urlencoded
|
---|
| 594 | \*======================================================================*/
|
---|
| 595 | function set_submit_normal()
|
---|
| 596 | {
|
---|
| 597 | $this->_submit_type = "application/x-www-form-urlencoded";
|
---|
| 598 | }
|
---|
| 599 |
|
---|
| 600 |
|
---|
| 601 |
|
---|
| 602 |
|
---|
| 603 | /*======================================================================*\
|
---|
| 604 | Private functions
|
---|
| 605 | \*======================================================================*/
|
---|
| 606 |
|
---|
| 607 |
|
---|
| 608 | /*======================================================================*\
|
---|
| 609 | Function: _striplinks
|
---|
| 610 | Purpose: strip the hyperlinks from an html document
|
---|
| 611 | Input: $document document to strip.
|
---|
| 612 | Output: $match an array of the links
|
---|
| 613 | \*======================================================================*/
|
---|
| 614 |
|
---|
| 615 | function _striplinks($document)
|
---|
| 616 | {
|
---|
| 617 | preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
|
---|
| 618 | ([\"\'])? # find single or double quote
|
---|
| 619 | (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
|
---|
| 620 | # quote, otherwise match up to next space
|
---|
| 621 | 'isx",$document,$links);
|
---|
| 622 |
|
---|
| 623 |
|
---|
| 624 | // catenate the non-empty matches from the conditional subpattern
|
---|
| 625 |
|
---|
| 626 | while(list($key,$val) = each($links[2]))
|
---|
| 627 | {
|
---|
| 628 | if(!empty($val))
|
---|
| 629 | $match[] = $val;
|
---|
| 630 | }
|
---|
| 631 |
|
---|
| 632 | while(list($key,$val) = each($links[3]))
|
---|
| 633 | {
|
---|
| 634 | if(!empty($val))
|
---|
| 635 | $match[] = $val;
|
---|
| 636 | }
|
---|
| 637 |
|
---|
| 638 | // return the links
|
---|
| 639 | return $match;
|
---|
| 640 | }
|
---|
| 641 |
|
---|
| 642 | /*======================================================================*\
|
---|
| 643 | Function: _stripform
|
---|
| 644 | Purpose: strip the form elements from an html document
|
---|
| 645 | Input: $document document to strip.
|
---|
| 646 | Output: $match an array of the links
|
---|
| 647 | \*======================================================================*/
|
---|
| 648 |
|
---|
| 649 | function _stripform($document)
|
---|
| 650 | {
|
---|
| 651 | preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
|
---|
| 652 |
|
---|
| 653 | // catenate the matches
|
---|
| 654 | $match = implode("\r\n",$elements[0]);
|
---|
| 655 |
|
---|
| 656 | // return the links
|
---|
| 657 | return $match;
|
---|
| 658 | }
|
---|
| 659 |
|
---|
| 660 |
|
---|
| 661 |
|
---|
| 662 | /*======================================================================*\
|
---|
| 663 | Function: _striptext
|
---|
| 664 | Purpose: strip the text from an html document
|
---|
| 665 | Input: $document document to strip.
|
---|
| 666 | Output: $text the resulting text
|
---|
| 667 | \*======================================================================*/
|
---|
| 668 |
|
---|
| 669 | function _striptext($document)
|
---|
| 670 | {
|
---|
| 671 |
|
---|
| 672 | // I didn't use preg eval (//e) since that is only available in PHP 4.0.
|
---|
| 673 | // so, list your entities one by one here. I included some of the
|
---|
| 674 | // more common ones.
|
---|
| 675 |
|
---|
| 676 | $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
|
---|
| 677 | "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
|
---|
| 678 | "'([\r\n])[\s]+'", // strip out white space
|
---|
| 679 | "'&(quot|#34|#034|#x22);'i", // replace html entities
|
---|
| 680 | "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
|
---|
| 681 | "'&(lt|#60|#060|#x3c);'i",
|
---|
| 682 | "'&(gt|#62|#062|#x3e);'i",
|
---|
| 683 | "'&(nbsp|#160|#xa0);'i",
|
---|
| 684 | "'&(iexcl|#161);'i",
|
---|
| 685 | "'&(cent|#162);'i",
|
---|
| 686 | "'&(pound|#163);'i",
|
---|
| 687 | "'&(copy|#169);'i",
|
---|
| 688 | "'&(reg|#174);'i",
|
---|
| 689 | "'&(deg|#176);'i",
|
---|
| 690 | "'&(#39|#039|#x27);'",
|
---|
| 691 | "'&(euro|#8364);'i", // europe
|
---|
| 692 | "'&a(uml|UML);'", // german
|
---|
| 693 | "'&o(uml|UML);'",
|
---|
| 694 | "'&u(uml|UML);'",
|
---|
| 695 | "'&A(uml|UML);'",
|
---|
| 696 | "'&O(uml|UML);'",
|
---|
| 697 | "'&U(uml|UML);'",
|
---|
| 698 | "'ß'i",
|
---|
| 699 | );
|
---|
| 700 | $replace = array( "",
|
---|
| 701 | "",
|
---|
| 702 | "\\1",
|
---|
| 703 | "\"",
|
---|
| 704 | "&",
|
---|
| 705 | "<",
|
---|
| 706 | ">",
|
---|
| 707 | " ",
|
---|
| 708 | chr(161),
|
---|
| 709 | chr(162),
|
---|
| 710 | chr(163),
|
---|
| 711 | chr(169),
|
---|
| 712 | chr(174),
|
---|
| 713 | chr(176),
|
---|
| 714 | chr(39),
|
---|
| 715 | chr(128),
|
---|
| 716 | "ä",
|
---|
| 717 | "ö",
|
---|
| 718 | "ü",
|
---|
| 719 | "Ä",
|
---|
| 720 | "Ö",
|
---|
| 721 | "Ü",
|
---|
| 722 | "ß",
|
---|
| 723 | );
|
---|
| 724 |
|
---|
| 725 | $text = preg_replace($search,$replace,$document);
|
---|
| 726 |
|
---|
| 727 | return $text;
|
---|
| 728 | }
|
---|
| 729 |
|
---|
| 730 | /*======================================================================*\
|
---|
| 731 | Function: _expandlinks
|
---|
| 732 | Purpose: expand each link into a fully qualified URL
|
---|
| 733 | Input: $links the links to qualify
|
---|
| 734 | $URI the full URI to get the base from
|
---|
| 735 | Output: $expandedLinks the expanded links
|
---|
| 736 | \*======================================================================*/
|
---|
| 737 |
|
---|
| 738 | function _expandlinks($links,$URI)
|
---|
| 739 | {
|
---|
| 740 |
|
---|
| 741 | preg_match("/^[^\?]+/",$URI,$match);
|
---|
| 742 |
|
---|
| 743 | $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
|
---|
| 744 | $match = preg_replace("|/$|","",$match);
|
---|
| 745 | $match_part = parse_url($match);
|
---|
| 746 | $match_root =
|
---|
| 747 | $match_part["scheme"]."://".$match_part["host"];
|
---|
| 748 |
|
---|
| 749 | $search = array( "|^http://".preg_quote($this->host)."|i",
|
---|
| 750 | "|^(\/)|i",
|
---|
| 751 | "|^(?!http://)(?!mailto:)|i",
|
---|
| 752 | "|/\./|",
|
---|
| 753 | "|/[^\/]+/\.\./|"
|
---|
| 754 | );
|
---|
| 755 |
|
---|
| 756 | $replace = array( "",
|
---|
| 757 | $match_root."/",
|
---|
| 758 | $match."/",
|
---|
| 759 | "/",
|
---|
| 760 | "/"
|
---|
| 761 | );
|
---|
| 762 |
|
---|
| 763 | $expandedLinks = preg_replace($search,$replace,$links);
|
---|
| 764 |
|
---|
| 765 | return $expandedLinks;
|
---|
| 766 | }
|
---|
| 767 |
|
---|
| 768 | /*======================================================================*\
|
---|
| 769 | Function: _httprequest
|
---|
| 770 | Purpose: go get the http data from the server
|
---|
| 771 | Input: $url the url to fetch
|
---|
| 772 | $fp the current open file pointer
|
---|
| 773 | $URI the full URI
|
---|
| 774 | $body body contents to send if any (POST)
|
---|
| 775 | Output:
|
---|
| 776 | \*======================================================================*/
|
---|
| 777 |
|
---|
| 778 | function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
|
---|
| 779 | {
|
---|
| 780 | $cookie_headers = '';
|
---|
| 781 | if($this->passcookies && $this->_redirectaddr)
|
---|
| 782 | $this->setcookies();
|
---|
| 783 |
|
---|
| 784 | $URI_PARTS = parse_url($URI);
|
---|
| 785 | if(empty($url))
|
---|
| 786 | $url = "/";
|
---|
| 787 | $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
|
---|
| 788 | if(!empty($this->agent))
|
---|
| 789 | $headers .= "User-Agent: ".$this->agent."\r\n";
|
---|
| 790 | if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
|
---|
| 791 | $headers .= "Host: ".$this->host;
|
---|
| 792 | if(!empty($this->port) && $this->port != 80)
|
---|
| 793 | $headers .= ":".$this->port;
|
---|
| 794 | $headers .= "\r\n";
|
---|
| 795 | }
|
---|
| 796 | if(!empty($this->accept))
|
---|
| 797 | $headers .= "Accept: ".$this->accept."\r\n";
|
---|
| 798 | if(!empty($this->referer))
|
---|
| 799 | $headers .= "Referer: ".$this->referer."\r\n";
|
---|
| 800 | if(!empty($this->cookies))
|
---|
| 801 | {
|
---|
| 802 | if(!is_array($this->cookies))
|
---|
| 803 | $this->cookies = (array)$this->cookies;
|
---|
| 804 |
|
---|
| 805 | reset($this->cookies);
|
---|
| 806 | if ( count($this->cookies) > 0 ) {
|
---|
| 807 | $cookie_headers .= 'Cookie: ';
|
---|
| 808 | foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
---|
| 809 | $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
|
---|
| 810 | }
|
---|
| 811 | $headers .= substr($cookie_headers,0,-2) . "\r\n";
|
---|
| 812 | }
|
---|
| 813 | }
|
---|
| 814 | if(!empty($this->rawheaders))
|
---|
| 815 | {
|
---|
| 816 | if(!is_array($this->rawheaders))
|
---|
| 817 | $this->rawheaders = (array)$this->rawheaders;
|
---|
| 818 | while(list($headerKey,$headerVal) = each($this->rawheaders))
|
---|
| 819 | $headers .= $headerKey.": ".$headerVal."\r\n";
|
---|
| 820 | }
|
---|
| 821 | if(!empty($content_type)) {
|
---|
| 822 | $headers .= "Content-type: $content_type";
|
---|
| 823 | if ($content_type == "multipart/form-data")
|
---|
| 824 | $headers .= "; boundary=".$this->_mime_boundary;
|
---|
| 825 | $headers .= "\r\n";
|
---|
| 826 | }
|
---|
| 827 | if(!empty($body))
|
---|
| 828 | $headers .= "Content-length: ".strlen($body)."\r\n";
|
---|
| 829 | if(!empty($this->user) || !empty($this->pass))
|
---|
| 830 | $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
|
---|
| 831 |
|
---|
| 832 | //add proxy auth headers
|
---|
| 833 | if(!empty($this->proxy_user))
|
---|
| 834 | $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
|
---|
| 835 |
|
---|
| 836 |
|
---|
| 837 | $headers .= "\r\n";
|
---|
| 838 |
|
---|
| 839 | // set the read timeout if needed
|
---|
| 840 | if ($this->read_timeout > 0)
|
---|
| 841 | socket_set_timeout($fp, $this->read_timeout);
|
---|
| 842 | $this->timed_out = false;
|
---|
| 843 |
|
---|
| 844 | fwrite($fp,$headers.$body,strlen($headers.$body));
|
---|
| 845 |
|
---|
| 846 | $this->_redirectaddr = false;
|
---|
| 847 | unset($this->headers);
|
---|
| 848 |
|
---|
| 849 | while($currentHeader = fgets($fp,$this->_maxlinelen))
|
---|
| 850 | {
|
---|
| 851 | if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
---|
| 852 | {
|
---|
| 853 | $this->status=-100;
|
---|
| 854 | return false;
|
---|
| 855 | }
|
---|
| 856 |
|
---|
| 857 | if($currentHeader == "\r\n")
|
---|
| 858 | break;
|
---|
| 859 |
|
---|
| 860 | // if a header begins with Location: or URI:, set the redirect
|
---|
| 861 | if(preg_match("/^(Location:|URI:)/i",$currentHeader))
|
---|
| 862 | {
|
---|
| 863 | // get URL portion of the redirect
|
---|
| 864 | preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
|
---|
| 865 | // look for :// in the Location header to see if hostname is included
|
---|
| 866 | if(!preg_match("|\:\/\/|",$matches[2]))
|
---|
| 867 | {
|
---|
| 868 | // no host in the path, so prepend
|
---|
| 869 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
---|
| 870 | // eliminate double slash
|
---|
| 871 | if(!preg_match("|^/|",$matches[2]))
|
---|
| 872 | $this->_redirectaddr .= "/".$matches[2];
|
---|
| 873 | else
|
---|
| 874 | $this->_redirectaddr .= $matches[2];
|
---|
| 875 | }
|
---|
| 876 | else
|
---|
| 877 | $this->_redirectaddr = $matches[2];
|
---|
| 878 | }
|
---|
| 879 |
|
---|
| 880 | if(preg_match("|^HTTP/|",$currentHeader))
|
---|
| 881 | {
|
---|
| 882 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
|
---|
| 883 | {
|
---|
| 884 | $this->status= $status[1];
|
---|
| 885 | }
|
---|
| 886 | $this->response_code = $currentHeader;
|
---|
| 887 | }
|
---|
| 888 |
|
---|
| 889 | $this->headers[] = $currentHeader;
|
---|
| 890 | }
|
---|
| 891 |
|
---|
| 892 | $results = '';
|
---|
| 893 | do {
|
---|
| 894 | $_data = fread($fp, $this->maxlength);
|
---|
| 895 | if (strlen($_data) == 0) {
|
---|
| 896 | break;
|
---|
| 897 | }
|
---|
| 898 | $results .= $_data;
|
---|
| 899 | } while(true);
|
---|
| 900 |
|
---|
| 901 | if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
---|
| 902 | {
|
---|
| 903 | $this->status=-100;
|
---|
| 904 | return false;
|
---|
| 905 | }
|
---|
| 906 |
|
---|
| 907 | // check if there is a a redirect meta tag
|
---|
| 908 |
|
---|
| 909 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
---|
| 910 |
|
---|
| 911 | {
|
---|
| 912 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
---|
| 913 | }
|
---|
| 914 |
|
---|
| 915 | // have we hit our frame depth and is there frame src to fetch?
|
---|
| 916 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
---|
| 917 | {
|
---|
| 918 | $this->results[] = $results;
|
---|
| 919 | for($x=0; $x<count($match[1]); $x++)
|
---|
| 920 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
---|
| 921 | }
|
---|
| 922 | // have we already fetched framed content?
|
---|
| 923 | elseif(is_array($this->results))
|
---|
| 924 | $this->results[] = $results;
|
---|
| 925 | // no framed content
|
---|
| 926 | else
|
---|
| 927 | $this->results = $results;
|
---|
| 928 |
|
---|
| 929 | return true;
|
---|
| 930 | }
|
---|
| 931 |
|
---|
| 932 | /*======================================================================*\
|
---|
| 933 | Function: _httpsrequest
|
---|
| 934 | Purpose: go get the https data from the server using curl
|
---|
| 935 | Input: $url the url to fetch
|
---|
| 936 | $URI the full URI
|
---|
| 937 | $body body contents to send if any (POST)
|
---|
| 938 | Output:
|
---|
| 939 | \*======================================================================*/
|
---|
| 940 |
|
---|
| 941 | function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
|
---|
| 942 | {
|
---|
| 943 | if($this->passcookies && $this->_redirectaddr)
|
---|
| 944 | $this->setcookies();
|
---|
| 945 |
|
---|
| 946 | $headers = array();
|
---|
| 947 |
|
---|
| 948 | $URI_PARTS = parse_url($URI);
|
---|
| 949 | if(empty($url))
|
---|
| 950 | $url = "/";
|
---|
| 951 | // GET ... header not needed for curl
|
---|
| 952 | //$headers[] = $http_method." ".$url." ".$this->_httpversion;
|
---|
| 953 | if(!empty($this->agent))
|
---|
| 954 | $headers[] = "User-Agent: ".$this->agent;
|
---|
| 955 | if(!empty($this->host))
|
---|
| 956 | if(!empty($this->port))
|
---|
| 957 | $headers[] = "Host: ".$this->host.":".$this->port;
|
---|
| 958 | else
|
---|
| 959 | $headers[] = "Host: ".$this->host;
|
---|
| 960 | if(!empty($this->accept))
|
---|
| 961 | $headers[] = "Accept: ".$this->accept;
|
---|
| 962 | if(!empty($this->referer))
|
---|
| 963 | $headers[] = "Referer: ".$this->referer;
|
---|
| 964 | if(!empty($this->cookies))
|
---|
| 965 | {
|
---|
| 966 | if(!is_array($this->cookies))
|
---|
| 967 | $this->cookies = (array)$this->cookies;
|
---|
| 968 |
|
---|
| 969 | reset($this->cookies);
|
---|
| 970 | if ( count($this->cookies) > 0 ) {
|
---|
| 971 | $cookie_str = 'Cookie: ';
|
---|
| 972 | foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
---|
| 973 | $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
|
---|
| 974 | }
|
---|
| 975 | $headers[] = substr($cookie_str,0,-2);
|
---|
| 976 | }
|
---|
| 977 | }
|
---|
| 978 | if(!empty($this->rawheaders))
|
---|
| 979 | {
|
---|
| 980 | if(!is_array($this->rawheaders))
|
---|
| 981 | $this->rawheaders = (array)$this->rawheaders;
|
---|
| 982 | while(list($headerKey,$headerVal) = each($this->rawheaders))
|
---|
| 983 | $headers[] = $headerKey.": ".$headerVal;
|
---|
| 984 | }
|
---|
| 985 | if(!empty($content_type)) {
|
---|
| 986 | if ($content_type == "multipart/form-data")
|
---|
| 987 | $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
|
---|
| 988 | else
|
---|
| 989 | $headers[] = "Content-type: $content_type";
|
---|
| 990 | }
|
---|
| 991 | if(!empty($body))
|
---|
| 992 | $headers[] = "Content-length: ".strlen($body);
|
---|
| 993 | if(!empty($this->user) || !empty($this->pass))
|
---|
| 994 | $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
|
---|
| 995 |
|
---|
| 996 | for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
|
---|
| 997 | $safer_header = strtr( $headers[$curr_header], "\"", " " );
|
---|
| 998 | $cmdline_params .= " -H \"".$safer_header."\"";
|
---|
| 999 | }
|
---|
| 1000 |
|
---|
| 1001 | if(!empty($body))
|
---|
| 1002 | $cmdline_params .= " -d \"$body\"";
|
---|
| 1003 |
|
---|
| 1004 | if($this->read_timeout > 0)
|
---|
| 1005 | $cmdline_params .= " -m ".$this->read_timeout;
|
---|
| 1006 |
|
---|
| 1007 | $headerfile = tempnam($temp_dir, "sno");
|
---|
| 1008 |
|
---|
| 1009 | exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
|
---|
| 1010 |
|
---|
| 1011 | if($return)
|
---|
| 1012 | {
|
---|
| 1013 | $this->error = "Error: cURL could not retrieve the document, error $return.";
|
---|
| 1014 | return false;
|
---|
| 1015 | }
|
---|
| 1016 |
|
---|
| 1017 |
|
---|
| 1018 | $results = implode("\r\n",$results);
|
---|
| 1019 |
|
---|
| 1020 | $result_headers = file("$headerfile");
|
---|
| 1021 |
|
---|
| 1022 | $this->_redirectaddr = false;
|
---|
| 1023 | unset($this->headers);
|
---|
| 1024 |
|
---|
| 1025 | for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
|
---|
| 1026 | {
|
---|
| 1027 |
|
---|
| 1028 | // if a header begins with Location: or URI:, set the redirect
|
---|
| 1029 | if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
|
---|
| 1030 | {
|
---|
| 1031 | // get URL portion of the redirect
|
---|
| 1032 | preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
|
---|
| 1033 | // look for :// in the Location header to see if hostname is included
|
---|
| 1034 | if(!preg_match("|\:\/\/|",$matches[2]))
|
---|
| 1035 | {
|
---|
| 1036 | // no host in the path, so prepend
|
---|
| 1037 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
---|
| 1038 | // eliminate double slash
|
---|
| 1039 | if(!preg_match("|^/|",$matches[2]))
|
---|
| 1040 | $this->_redirectaddr .= "/".$matches[2];
|
---|
| 1041 | else
|
---|
| 1042 | $this->_redirectaddr .= $matches[2];
|
---|
| 1043 | }
|
---|
| 1044 | else
|
---|
| 1045 | $this->_redirectaddr = $matches[2];
|
---|
| 1046 | }
|
---|
| 1047 |
|
---|
| 1048 | if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
|
---|
| 1049 | $this->response_code = $result_headers[$currentHeader];
|
---|
| 1050 |
|
---|
| 1051 | $this->headers[] = $result_headers[$currentHeader];
|
---|
| 1052 | }
|
---|
| 1053 |
|
---|
| 1054 | // check if there is a a redirect meta tag
|
---|
| 1055 |
|
---|
| 1056 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
---|
| 1057 | {
|
---|
| 1058 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
---|
| 1059 | }
|
---|
| 1060 |
|
---|
| 1061 | // have we hit our frame depth and is there frame src to fetch?
|
---|
| 1062 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
---|
| 1063 | {
|
---|
| 1064 | $this->results[] = $results;
|
---|
| 1065 | for($x=0; $x<count($match[1]); $x++)
|
---|
| 1066 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
---|
| 1067 | }
|
---|
| 1068 | // have we already fetched framed content?
|
---|
| 1069 | elseif(is_array($this->results))
|
---|
| 1070 | $this->results[] = $results;
|
---|
| 1071 | // no framed content
|
---|
| 1072 | else
|
---|
| 1073 | $this->results = $results;
|
---|
| 1074 |
|
---|
| 1075 | unlink("$headerfile");
|
---|
| 1076 |
|
---|
| 1077 | return true;
|
---|
| 1078 | }
|
---|
| 1079 |
|
---|
| 1080 | /*======================================================================*\
|
---|
| 1081 | Function: setcookies()
|
---|
| 1082 | Purpose: set cookies for a redirection
|
---|
| 1083 | \*======================================================================*/
|
---|
| 1084 |
|
---|
| 1085 | function setcookies()
|
---|
| 1086 | {
|
---|
| 1087 | for($x=0; $x<count($this->headers); $x++)
|
---|
| 1088 | {
|
---|
| 1089 | if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
|
---|
| 1090 | $this->cookies[$match[1]] = urldecode($match[2]);
|
---|
| 1091 | }
|
---|
| 1092 | }
|
---|
| 1093 |
|
---|
| 1094 |
|
---|
| 1095 | /*======================================================================*\
|
---|
| 1096 | Function: _check_timeout
|
---|
| 1097 | Purpose: checks whether timeout has occurred
|
---|
| 1098 | Input: $fp file pointer
|
---|
| 1099 | \*======================================================================*/
|
---|
| 1100 |
|
---|
| 1101 | function _check_timeout($fp)
|
---|
| 1102 | {
|
---|
| 1103 | if ($this->read_timeout > 0) {
|
---|
| 1104 | $fp_status = socket_get_status($fp);
|
---|
| 1105 | if ($fp_status["timed_out"]) {
|
---|
| 1106 | $this->timed_out = true;
|
---|
| 1107 | return true;
|
---|
| 1108 | }
|
---|
| 1109 | }
|
---|
| 1110 | return false;
|
---|
| 1111 | }
|
---|
| 1112 |
|
---|
| 1113 | /*======================================================================*\
|
---|
| 1114 | Function: _connect
|
---|
| 1115 | Purpose: make a socket connection
|
---|
| 1116 | Input: $fp file pointer
|
---|
| 1117 | \*======================================================================*/
|
---|
| 1118 |
|
---|
| 1119 | function _connect(&$fp)
|
---|
| 1120 | {
|
---|
| 1121 | if(!empty($this->proxy_host) && !empty($this->proxy_port))
|
---|
| 1122 | {
|
---|
| 1123 | $this->_isproxy = true;
|
---|
| 1124 |
|
---|
| 1125 | $host = $this->proxy_host;
|
---|
| 1126 | $port = $this->proxy_port;
|
---|
| 1127 | }
|
---|
| 1128 | else
|
---|
| 1129 | {
|
---|
| 1130 | $host = $this->host;
|
---|
| 1131 | $port = $this->port;
|
---|
| 1132 | }
|
---|
| 1133 |
|
---|
| 1134 | $this->status = 0;
|
---|
| 1135 |
|
---|
| 1136 | if($fp = fsockopen(
|
---|
| 1137 | $host,
|
---|
| 1138 | $port,
|
---|
| 1139 | $errno,
|
---|
| 1140 | $errstr,
|
---|
| 1141 | $this->_fp_timeout
|
---|
| 1142 | ))
|
---|
| 1143 | {
|
---|
| 1144 | // socket connection succeeded
|
---|
| 1145 |
|
---|
| 1146 | return true;
|
---|
| 1147 | }
|
---|
| 1148 | else
|
---|
| 1149 | {
|
---|
| 1150 | // socket connection failed
|
---|
| 1151 | $this->status = $errno;
|
---|
| 1152 | switch($errno)
|
---|
| 1153 | {
|
---|
| 1154 | case -3:
|
---|
| 1155 | $this->error="socket creation failed (-3)";
|
---|
| 1156 | case -4:
|
---|
| 1157 | $this->error="dns lookup failure (-4)";
|
---|
| 1158 | case -5:
|
---|
| 1159 | $this->error="connection refused or timed out (-5)";
|
---|
| 1160 | default:
|
---|
| 1161 | $this->error="connection failed (".$errno.")";
|
---|
| 1162 | }
|
---|
| 1163 | return false;
|
---|
| 1164 | }
|
---|
| 1165 | }
|
---|
| 1166 | /*======================================================================*\
|
---|
| 1167 | Function: _disconnect
|
---|
| 1168 | Purpose: disconnect a socket connection
|
---|
| 1169 | Input: $fp file pointer
|
---|
| 1170 | \*======================================================================*/
|
---|
| 1171 |
|
---|
| 1172 | function _disconnect($fp)
|
---|
| 1173 | {
|
---|
| 1174 | return(fclose($fp));
|
---|
| 1175 | }
|
---|
| 1176 |
|
---|
| 1177 |
|
---|
| 1178 | /*======================================================================*\
|
---|
| 1179 | Function: _prepare_post_body
|
---|
| 1180 | Purpose: Prepare post body according to encoding type
|
---|
| 1181 | Input: $formvars - form variables
|
---|
| 1182 | $formfiles - form upload files
|
---|
| 1183 | Output: post body
|
---|
| 1184 | \*======================================================================*/
|
---|
| 1185 |
|
---|
| 1186 | function _prepare_post_body($formvars, $formfiles)
|
---|
| 1187 | {
|
---|
| 1188 | settype($formvars, "array");
|
---|
| 1189 | settype($formfiles, "array");
|
---|
| 1190 | $postdata = '';
|
---|
| 1191 |
|
---|
| 1192 | if (count($formvars) == 0 && count($formfiles) == 0)
|
---|
| 1193 | return;
|
---|
| 1194 |
|
---|
| 1195 | switch ($this->_submit_type) {
|
---|
| 1196 | case "application/x-www-form-urlencoded":
|
---|
| 1197 | reset($formvars);
|
---|
| 1198 | while(list($key,$val) = each($formvars)) {
|
---|
| 1199 | if (is_array($val) || is_object($val)) {
|
---|
| 1200 | while (list($cur_key, $cur_val) = each($val)) {
|
---|
| 1201 | $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
|
---|
| 1202 | }
|
---|
| 1203 | } else
|
---|
| 1204 | $postdata .= urlencode($key)."=".urlencode($val)."&";
|
---|
| 1205 | }
|
---|
| 1206 | break;
|
---|
| 1207 |
|
---|
| 1208 | case "multipart/form-data":
|
---|
| 1209 | $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
|
---|
| 1210 |
|
---|
| 1211 | reset($formvars);
|
---|
| 1212 | while(list($key,$val) = each($formvars)) {
|
---|
| 1213 | if (is_array($val) || is_object($val)) {
|
---|
| 1214 | while (list($cur_key, $cur_val) = each($val)) {
|
---|
| 1215 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
| 1216 | $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
|
---|
| 1217 | $postdata .= "$cur_val\r\n";
|
---|
| 1218 | }
|
---|
| 1219 | } else {
|
---|
| 1220 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
| 1221 | $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
|
---|
| 1222 | $postdata .= "$val\r\n";
|
---|
| 1223 | }
|
---|
| 1224 | }
|
---|
| 1225 |
|
---|
| 1226 | reset($formfiles);
|
---|
| 1227 | while (list($field_name, $file_names) = each($formfiles)) {
|
---|
| 1228 | settype($file_names, "array");
|
---|
| 1229 | while (list(, $file_name) = each($file_names)) {
|
---|
| 1230 | if (!is_readable($file_name)) continue;
|
---|
| 1231 |
|
---|
| 1232 | $fp = fopen($file_name, "r");
|
---|
| 1233 | $file_content = fread($fp, filesize($file_name));
|
---|
| 1234 | fclose($fp);
|
---|
| 1235 | $base_name = basename($file_name);
|
---|
| 1236 |
|
---|
| 1237 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
| 1238 | $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
|
---|
| 1239 | $postdata .= "$file_content\r\n";
|
---|
| 1240 | }
|
---|
| 1241 | }
|
---|
| 1242 | $postdata .= "--".$this->_mime_boundary."--\r\n";
|
---|
| 1243 | break;
|
---|
| 1244 | }
|
---|
| 1245 |
|
---|
| 1246 | return $postdata;
|
---|
| 1247 | }
|
---|
| 1248 | }
|
---|
| 1249 | endif;
|
---|
| 1250 | ?>
|
---|