1 | <?php
|
---|
2 | # Relative URI static class: PHP class for resolving relative URLs
|
---|
3 | #
|
---|
4 | # This class is derived (under the terms of the GPL) from URL Class 0.3 by
|
---|
5 | # Keyvan Minoukadeh <keyvan@k1m.com>, which is great but more than we need
|
---|
6 | # for FeedWordPress's purposes. The class has been stripped down to a single
|
---|
7 | # public method: Relative_URI::resolve($url, $base), which resolves the URI in
|
---|
8 | # $url relative to the URI in $base
|
---|
9 |
|
---|
10 | class Relative_URI
|
---|
11 | {
|
---|
12 | // Resolve relative URI in $url against the base URI in $base. If $base
|
---|
13 | // is not supplied, then we use the REQUEST_URI of this script.
|
---|
14 | //
|
---|
15 | // I'm hoping this method reflects RFC 2396 Section 5.2
|
---|
16 | function resolve ($url, $base = NULL)
|
---|
17 | {
|
---|
18 | if (is_null($base)):
|
---|
19 | $base = 'http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'];
|
---|
20 | endif;
|
---|
21 |
|
---|
22 | $base = Relative_URI::_encode(trim($base));
|
---|
23 | $uri_parts = Relative_URI::_parse_url($base);
|
---|
24 |
|
---|
25 | $url = Relative_URI::_encode(trim($url));
|
---|
26 | $parts = Relative_URI::_parse_url($url);
|
---|
27 |
|
---|
28 | $uri_parts['fragment'] = (isset($parts['fragment']) ? $parts['fragment'] : null);
|
---|
29 | $uri_parts['query'] = (isset($parts['query']) ? $parts['query'] : null);
|
---|
30 |
|
---|
31 | // if path is empty, and scheme, host, and query are undefined,
|
---|
32 | // the URL is referring the base URL
|
---|
33 |
|
---|
34 | if (($parts['path'] == '') && !isset($parts['scheme']) && !isset($parts['host']) && !isset($parts['query'])) {
|
---|
35 | // If the URI is empty or only a fragment, return the base URI
|
---|
36 | return $base . (isset($parts['fragment']) ? '#'.$parts['fragment'] : '');
|
---|
37 | } elseif (isset($parts['scheme'])) {
|
---|
38 | // If the scheme is set, then the URI is absolute.
|
---|
39 | return $url;
|
---|
40 | } elseif (isset($parts['host'])) {
|
---|
41 | $uri_parts['host'] = $parts['host'];
|
---|
42 | $uri_parts['path'] = $parts['path'];
|
---|
43 | } else {
|
---|
44 | // We have a relative path but not a host.
|
---|
45 |
|
---|
46 | // start ugly fix:
|
---|
47 | // prepend slash to path if base host is set, base path is not set, and url path is not absolute
|
---|
48 | if ($uri_parts['host'] && ($uri_parts['path'] == '')
|
---|
49 | && (strlen($parts['path']) > 0)
|
---|
50 | && (substr($parts['path'], 0, 1) != '/')) {
|
---|
51 | $parts['path'] = '/'.$parts['path'];
|
---|
52 | } // end ugly fix
|
---|
53 |
|
---|
54 | if (substr($parts['path'], 0, 1) == '/') {
|
---|
55 | $uri_parts['path'] = $parts['path'];
|
---|
56 | } else {
|
---|
57 | // copy base path excluding any characters after the last (right-most) slash character
|
---|
58 | $buffer = substr($uri_parts['path'], 0, (int)strrpos($uri_parts['path'], '/')+1);
|
---|
59 | // append relative path
|
---|
60 | $buffer .= $parts['path'];
|
---|
61 | // remove "./" where "." is a complete path segment.
|
---|
62 | $buffer = str_replace('/./', '/', $buffer);
|
---|
63 | if (substr($buffer, 0, 2) == './') {
|
---|
64 | $buffer = substr($buffer, 2);
|
---|
65 | }
|
---|
66 | // if buffer ends with "." as a complete path segment, remove it
|
---|
67 | if (substr($buffer, -2) == '/.') {
|
---|
68 | $buffer = substr($buffer, 0, -1);
|
---|
69 | }
|
---|
70 | // remove "<segment>/../" where <segment> is a complete path segment not equal to ".."
|
---|
71 | $search_finished = false;
|
---|
72 | $segment = explode('/', $buffer);
|
---|
73 | while (!$search_finished) {
|
---|
74 | for ($x=0; $x+1 < count($segment);) {
|
---|
75 | if (($segment[$x] != '') && ($segment[$x] != '..') && ($segment[$x+1] == '..')) {
|
---|
76 | if ($x+2 == count($segment)) $segment[] = '';
|
---|
77 | unset($segment[$x], $segment[$x+1]);
|
---|
78 | $segment = array_values($segment);
|
---|
79 | continue 2;
|
---|
80 | } else {
|
---|
81 | $x++;
|
---|
82 | }
|
---|
83 | }
|
---|
84 | $search_finished = true;
|
---|
85 | }
|
---|
86 | $buffer = (count($segment) == 1) ? '/' : implode('/', $segment);
|
---|
87 | $uri_parts['path'] = $buffer;
|
---|
88 |
|
---|
89 | }
|
---|
90 | }
|
---|
91 |
|
---|
92 | // If we've gotten to this point, we can try to put the pieces
|
---|
93 | // back together.
|
---|
94 | $ret = '';
|
---|
95 | if (isset($uri_parts['scheme'])) $ret .= $uri_parts['scheme'].':';
|
---|
96 | if (isset($uri_parts['user'])) {
|
---|
97 | $ret .= $uri_parts['user'];
|
---|
98 | if (isset($uri_parts['pass'])) $ret .= ':'.$uri_parts['parts'];
|
---|
99 | $ret .= '@';
|
---|
100 | }
|
---|
101 | if (isset($uri_parts['host'])) {
|
---|
102 | $ret .= '//'.$uri_parts['host'];
|
---|
103 | if (isset($uri_parts['port'])) $ret .= ':'.$uri_parts['port'];
|
---|
104 | }
|
---|
105 | $ret .= $uri_parts['path'];
|
---|
106 | if (isset($uri_parts['query'])) $ret .= '?'.$uri_parts['query'];
|
---|
107 | if (isset($uri_parts['fragment'])) $ret .= '#'.$uri_parts['fragment'];
|
---|
108 |
|
---|
109 | return $ret;
|
---|
110 | }
|
---|
111 |
|
---|
112 | /**
|
---|
113 | * Parse URL
|
---|
114 | *
|
---|
115 | * Regular expression grabbed from RFC 2396 Appendix B.
|
---|
116 | * This is a replacement for PHPs builtin parse_url().
|
---|
117 | * @param string $url
|
---|
118 | * @access private
|
---|
119 | * @return array
|
---|
120 | */
|
---|
121 | function _parse_url($url)
|
---|
122 | {
|
---|
123 | // I'm using this pattern instead of parse_url() as there's a few strings where parse_url()
|
---|
124 | // generates a warning.
|
---|
125 | if (preg_match('!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!', $url, $match)) {
|
---|
126 | $parts = array();
|
---|
127 | if ($match[1] != '') $parts['scheme'] = $match[2];
|
---|
128 | if ($match[3] != '') $parts['auth'] = $match[4];
|
---|
129 | // parse auth
|
---|
130 | if (isset($parts['auth'])) {
|
---|
131 | // store user info
|
---|
132 | if (($at_pos = strpos($parts['auth'], '@')) !== false) {
|
---|
133 | $userinfo = explode(':', substr($parts['auth'], 0, $at_pos), 2);
|
---|
134 | $parts['user'] = $userinfo[0];
|
---|
135 | if (isset($userinfo[1])) $parts['pass'] = $userinfo[1];
|
---|
136 | $parts['auth'] = substr($parts['auth'], $at_pos+1);
|
---|
137 | }
|
---|
138 | // get port number
|
---|
139 | if ($port_pos = strrpos($parts['auth'], ':')) {
|
---|
140 | $parts['host'] = substr($parts['auth'], 0, $port_pos);
|
---|
141 | $parts['port'] = (int)substr($parts['auth'], $port_pos+1);
|
---|
142 | if ($parts['port'] < 1) $parts['port'] = null;
|
---|
143 | } else {
|
---|
144 | $parts['host'] = $parts['auth'];
|
---|
145 | }
|
---|
146 | }
|
---|
147 | unset($parts['auth']);
|
---|
148 | $parts['path'] = $match[5];
|
---|
149 | if (isset($match[6]) && ($match[6] != '')) $parts['query'] = $match[7];
|
---|
150 | if (isset($match[8]) && ($match[8] != '')) $parts['fragment'] = $match[9];
|
---|
151 | return $parts;
|
---|
152 | }
|
---|
153 | // shouldn't reach here
|
---|
154 | return array('path'=>'');
|
---|
155 | }
|
---|
156 |
|
---|
157 | function _encode($string)
|
---|
158 | {
|
---|
159 | static $replace = array();
|
---|
160 | if (!count($replace)) {
|
---|
161 | $find = array(32, 34, 60, 62, 123, 124, 125, 91, 92, 93, 94, 96, 127);
|
---|
162 | $find = array_merge(range(0, 31), $find);
|
---|
163 | $find = array_map('chr', $find);
|
---|
164 | foreach ($find as $char) {
|
---|
165 | $replace[$char] = '%'.bin2hex($char);
|
---|
166 | }
|
---|
167 | }
|
---|
168 | // escape control characters and a few other characters
|
---|
169 | $encoded = strtr($string, $replace);
|
---|
170 | // remove any character outside the hex range: 21 - 7E (see www.asciitable.com)
|
---|
171 | return preg_replace('/[^\x21-\x7e]/', '', $encoded);
|
---|
172 | }
|
---|
173 | } // class Relative_URI
|
---|
174 |
|
---|