[44] | 1 | <?php
|
---|
| 2 | # Relative URI static class: PHP class for resolving relative URLs
|
---|
| 3 | #
|
---|
| 4 | # This class is derived (under the terms of the GPL) from URL Class 0.3 by
|
---|
| 5 | # Keyvan Minoukadeh <keyvan@k1m.com>, which is great but more than we need
|
---|
| 6 | # for FeedWordPress's purposes. The class has been stripped down to a single
|
---|
| 7 | # public method: Relative_URI::resolve($url, $base), which resolves the URI in
|
---|
| 8 | # $url relative to the URI in $base
|
---|
| 9 |
|
---|
| 10 | class Relative_URI
|
---|
| 11 | {
|
---|
| 12 | // Resolve relative URI in $url against the base URI in $base. If $base
|
---|
| 13 | // is not supplied, then we use the REQUEST_URI of this script.
|
---|
| 14 | //
|
---|
| 15 | // I'm hoping this method reflects RFC 2396 Section 5.2
|
---|
| 16 | function resolve ($url, $base = NULL)
|
---|
| 17 | {
|
---|
| 18 | if (is_null($base)):
|
---|
| 19 | $base = 'http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'];
|
---|
| 20 | endif;
|
---|
| 21 |
|
---|
| 22 | $base = Relative_URI::_encode(trim($base));
|
---|
| 23 | $uri_parts = Relative_URI::_parse_url($base);
|
---|
| 24 |
|
---|
| 25 | $url = Relative_URI::_encode(trim($url));
|
---|
| 26 | $parts = Relative_URI::_parse_url($url);
|
---|
| 27 |
|
---|
| 28 | $uri_parts['fragment'] = (isset($parts['fragment']) ? $parts['fragment'] : null);
|
---|
| 29 | $uri_parts['query'] = (isset($parts['query']) ? $parts['query'] : null);
|
---|
| 30 |
|
---|
| 31 | // if path is empty, and scheme, host, and query are undefined,
|
---|
| 32 | // the URL is referring the base URL
|
---|
| 33 |
|
---|
| 34 | if (($parts['path'] == '') && !isset($parts['scheme']) && !isset($parts['host']) && !isset($parts['query'])) {
|
---|
| 35 | // If the URI is empty or only a fragment, return the base URI
|
---|
| 36 | return $base . (isset($parts['fragment']) ? '#'.$parts['fragment'] : '');
|
---|
| 37 | } elseif (isset($parts['scheme'])) {
|
---|
| 38 | // If the scheme is set, then the URI is absolute.
|
---|
| 39 | return $url;
|
---|
| 40 | } elseif (isset($parts['host'])) {
|
---|
| 41 | $uri_parts['host'] = $parts['host'];
|
---|
| 42 | $uri_parts['path'] = $parts['path'];
|
---|
| 43 | } else {
|
---|
| 44 | // We have a relative path but not a host.
|
---|
| 45 |
|
---|
| 46 | // start ugly fix:
|
---|
| 47 | // prepend slash to path if base host is set, base path is not set, and url path is not absolute
|
---|
| 48 | if ($uri_parts['host'] && ($uri_parts['path'] == '')
|
---|
| 49 | && (strlen($parts['path']) > 0)
|
---|
| 50 | && (substr($parts['path'], 0, 1) != '/')) {
|
---|
| 51 | $parts['path'] = '/'.$parts['path'];
|
---|
| 52 | } // end ugly fix
|
---|
| 53 |
|
---|
| 54 | if (substr($parts['path'], 0, 1) == '/') {
|
---|
| 55 | $uri_parts['path'] = $parts['path'];
|
---|
| 56 | } else {
|
---|
| 57 | // copy base path excluding any characters after the last (right-most) slash character
|
---|
| 58 | $buffer = substr($uri_parts['path'], 0, (int)strrpos($uri_parts['path'], '/')+1);
|
---|
| 59 | // append relative path
|
---|
| 60 | $buffer .= $parts['path'];
|
---|
| 61 | // remove "./" where "." is a complete path segment.
|
---|
| 62 | $buffer = str_replace('/./', '/', $buffer);
|
---|
| 63 | if (substr($buffer, 0, 2) == './') {
|
---|
| 64 | $buffer = substr($buffer, 2);
|
---|
| 65 | }
|
---|
| 66 | // if buffer ends with "." as a complete path segment, remove it
|
---|
| 67 | if (substr($buffer, -2) == '/.') {
|
---|
| 68 | $buffer = substr($buffer, 0, -1);
|
---|
| 69 | }
|
---|
| 70 | // remove "<segment>/../" where <segment> is a complete path segment not equal to ".."
|
---|
| 71 | $search_finished = false;
|
---|
| 72 | $segment = explode('/', $buffer);
|
---|
| 73 | while (!$search_finished) {
|
---|
| 74 | for ($x=0; $x+1 < count($segment);) {
|
---|
| 75 | if (($segment[$x] != '') && ($segment[$x] != '..') && ($segment[$x+1] == '..')) {
|
---|
| 76 | if ($x+2 == count($segment)) $segment[] = '';
|
---|
| 77 | unset($segment[$x], $segment[$x+1]);
|
---|
| 78 | $segment = array_values($segment);
|
---|
| 79 | continue 2;
|
---|
| 80 | } else {
|
---|
| 81 | $x++;
|
---|
| 82 | }
|
---|
| 83 | }
|
---|
| 84 | $search_finished = true;
|
---|
| 85 | }
|
---|
| 86 | $buffer = (count($segment) == 1) ? '/' : implode('/', $segment);
|
---|
| 87 | $uri_parts['path'] = $buffer;
|
---|
| 88 |
|
---|
| 89 | }
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | // If we've gotten to this point, we can try to put the pieces
|
---|
| 93 | // back together.
|
---|
| 94 | $ret = '';
|
---|
| 95 | if (isset($uri_parts['scheme'])) $ret .= $uri_parts['scheme'].':';
|
---|
| 96 | if (isset($uri_parts['user'])) {
|
---|
| 97 | $ret .= $uri_parts['user'];
|
---|
| 98 | if (isset($uri_parts['pass'])) $ret .= ':'.$uri_parts['parts'];
|
---|
| 99 | $ret .= '@';
|
---|
| 100 | }
|
---|
| 101 | if (isset($uri_parts['host'])) {
|
---|
| 102 | $ret .= '//'.$uri_parts['host'];
|
---|
| 103 | if (isset($uri_parts['port'])) $ret .= ':'.$uri_parts['port'];
|
---|
| 104 | }
|
---|
| 105 | $ret .= $uri_parts['path'];
|
---|
| 106 | if (isset($uri_parts['query'])) $ret .= '?'.$uri_parts['query'];
|
---|
| 107 | if (isset($uri_parts['fragment'])) $ret .= '#'.$uri_parts['fragment'];
|
---|
| 108 |
|
---|
| 109 | return $ret;
|
---|
| 110 | }
|
---|
| 111 |
|
---|
| 112 | /**
|
---|
| 113 | * Parse URL
|
---|
| 114 | *
|
---|
| 115 | * Regular expression grabbed from RFC 2396 Appendix B.
|
---|
| 116 | * This is a replacement for PHPs builtin parse_url().
|
---|
| 117 | * @param string $url
|
---|
| 118 | * @access private
|
---|
| 119 | * @return array
|
---|
| 120 | */
|
---|
| 121 | function _parse_url($url)
|
---|
| 122 | {
|
---|
| 123 | // I'm using this pattern instead of parse_url() as there's a few strings where parse_url()
|
---|
| 124 | // generates a warning.
|
---|
| 125 | if (preg_match('!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!', $url, $match)) {
|
---|
| 126 | $parts = array();
|
---|
| 127 | if ($match[1] != '') $parts['scheme'] = $match[2];
|
---|
| 128 | if ($match[3] != '') $parts['auth'] = $match[4];
|
---|
| 129 | // parse auth
|
---|
| 130 | if (isset($parts['auth'])) {
|
---|
| 131 | // store user info
|
---|
| 132 | if (($at_pos = strpos($parts['auth'], '@')) !== false) {
|
---|
| 133 | $userinfo = explode(':', substr($parts['auth'], 0, $at_pos), 2);
|
---|
| 134 | $parts['user'] = $userinfo[0];
|
---|
| 135 | if (isset($userinfo[1])) $parts['pass'] = $userinfo[1];
|
---|
| 136 | $parts['auth'] = substr($parts['auth'], $at_pos+1);
|
---|
| 137 | }
|
---|
| 138 | // get port number
|
---|
| 139 | if ($port_pos = strrpos($parts['auth'], ':')) {
|
---|
| 140 | $parts['host'] = substr($parts['auth'], 0, $port_pos);
|
---|
| 141 | $parts['port'] = (int)substr($parts['auth'], $port_pos+1);
|
---|
| 142 | if ($parts['port'] < 1) $parts['port'] = null;
|
---|
| 143 | } else {
|
---|
| 144 | $parts['host'] = $parts['auth'];
|
---|
| 145 | }
|
---|
| 146 | }
|
---|
| 147 | unset($parts['auth']);
|
---|
| 148 | $parts['path'] = $match[5];
|
---|
| 149 | if (isset($match[6]) && ($match[6] != '')) $parts['query'] = $match[7];
|
---|
| 150 | if (isset($match[8]) && ($match[8] != '')) $parts['fragment'] = $match[9];
|
---|
| 151 | return $parts;
|
---|
| 152 | }
|
---|
| 153 | // shouldn't reach here
|
---|
| 154 | return array('path'=>'');
|
---|
| 155 | }
|
---|
| 156 |
|
---|
| 157 | function _encode($string)
|
---|
| 158 | {
|
---|
| 159 | static $replace = array();
|
---|
| 160 | if (!count($replace)) {
|
---|
| 161 | $find = array(32, 34, 60, 62, 123, 124, 125, 91, 92, 93, 94, 96, 127);
|
---|
| 162 | $find = array_merge(range(0, 31), $find);
|
---|
| 163 | $find = array_map('chr', $find);
|
---|
| 164 | foreach ($find as $char) {
|
---|
| 165 | $replace[$char] = '%'.bin2hex($char);
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
| 168 | // escape control characters and a few other characters
|
---|
| 169 | $encoded = strtr($string, $replace);
|
---|
| 170 | // remove any character outside the hex range: 21 - 7E (see www.asciitable.com)
|
---|
| 171 | return preg_replace('/[^\x21-\x7e]/', '', $encoded);
|
---|
| 172 | }
|
---|
| 173 | } // class Relative_URI
|
---|
| 174 |
|
---|