1<?php 2/* Copyright (C) 2008-2020 Laurent Destailleur <eldy@users.sourceforge.net> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 * or see https://www.gnu.org/ 17 */ 18 19/** 20 * \file htdocs/core/lib/geturl.lib.php 21 * \brief This file contains functions dedicated to get URLs. 22 */ 23 24/** 25 * Function to get a content from an URL (use proxy if proxy defined). 26 * Support Dolibarr setup for timeout and proxy. 27 * Enhancement of CURL to add an anti SSRF protection: 28 * - you can set MAIN_SECURITY_ANTI_SSRF_SERVER_IP to set static ip of server 29 * - common local lookup ips like 127.*.*.* are automatically added 30 * 31 * @param string $url URL to call. 32 * @param string $postorget 'POST', 'GET', 'HEAD', 'PUT', 'PUTALREADYFORMATED', 'POSTALREADYFORMATED', 'DELETE' 33 * @param string $param Parameters of URL (x=value1&y=value2) or may be a formated content with $postorget='PUTALREADYFORMATED' 34 * @param integer $followlocation 0=Do not follow, 1=Follow location. 35 * @param string[] $addheaders Array of string to add into header. Example: ('Accept: application/xrds+xml', ....) 36 * @param string[] $allowedschemes List of schemes that are allowed ('http' + 'https' only by default) 37 * @param int $localurl 0=Only external URL are possible, 1=Only local URL, 2=Both external and local URL are allowed. 38 * @return array Returns an associative array containing the response from the server array('content'=>response, 'curl_error_no'=>errno, 'curl_error_msg'=>errmsg...) 39 */ 40function getURLContent($url, $postorget = 'GET', $param = '', $followlocation = 1, $addheaders = array(), $allowedschemes = array('http', 'https'), $localurl = 0) 41{ 42 //declaring of global variables 43 global $conf; 44 $USE_PROXY = empty($conf->global->MAIN_PROXY_USE) ? 0 : $conf->global->MAIN_PROXY_USE; 45 $PROXY_HOST = empty($conf->global->MAIN_PROXY_HOST) ? 0 : $conf->global->MAIN_PROXY_HOST; 46 $PROXY_PORT = empty($conf->global->MAIN_PROXY_PORT) ? 0 : $conf->global->MAIN_PROXY_PORT; 47 $PROXY_USER = empty($conf->global->MAIN_PROXY_USER) ? 0 : $conf->global->MAIN_PROXY_USER; 48 $PROXY_PASS = empty($conf->global->MAIN_PROXY_PASS) ? 0 : $conf->global->MAIN_PROXY_PASS; 49 50 dol_syslog("getURLContent postorget=".$postorget." URL=".$url." param=".$param); 51 52 //setting the curl parameters. 53 $ch = curl_init(); 54 55 /*print $API_Endpoint."-".$API_version."-".$PAYPAL_API_USER."-".$PAYPAL_API_PASSWORD."-".$PAYPAL_API_SIGNATURE."<br>"; 56 print $USE_PROXY."-".$gv_ApiErrorURL."<br>"; 57 print $nvpStr; 58 exit;*/ 59 curl_setopt($ch, CURLOPT_VERBOSE, 1); 60 curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function'); 61 62 // We use @ here because this may return warning if safe mode is on or open_basedir is on (following location is forbidden when safe mode is on). 63 // We force value to false so we will manage redirection ourself later. 64 @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); 65 66 if (is_array($addheaders) && count($addheaders)) { 67 curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders); 68 } 69 curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it 70 71 // By default use tls decied by PHP. 72 // You can force, if supported a version like TLSv1 or TLSv1.2 73 if (!empty($conf->global->MAIN_CURL_SSLVERSION)) { 74 curl_setopt($ch, CURLOPT_SSLVERSION, $conf->global->MAIN_CURL_SSLVERSION); 75 } 76 //curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2 77 78 // Turning off the server and peer verification(TrustManager Concept). 79 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); 80 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); 81 82 // Restrict use to some protocols only 83 $protocols = 0; 84 if (is_array($allowedschemes)) { 85 foreach ($allowedschemes as $allowedscheme) { 86 if ($allowedscheme == 'http') { 87 $protocols |= CURLPROTO_HTTP; 88 } 89 if ($allowedscheme == 'https') { 90 $protocols |= CURLPROTO_HTTPS; 91 } 92 } 93 curl_setopt($ch, CURLOPT_PROTOCOLS, $protocols); 94 curl_setopt($ch, CURLOPT_REDIR_PROTOCOLS, $protocols); 95 } 96 97 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, empty($conf->global->MAIN_USE_CONNECT_TIMEOUT) ? 5 : $conf->global->MAIN_USE_CONNECT_TIMEOUT); 98 curl_setopt($ch, CURLOPT_TIMEOUT, empty($conf->global->MAIN_USE_RESPONSE_TIMEOUT) ? 30 : $conf->global->MAIN_USE_RESPONSE_TIMEOUT); 99 100 //curl_setopt($ch, CURLOPT_SAFE_UPLOAD, true); // PHP 5.5 101 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // We want response 102 if ($postorget == 'POST') { 103 curl_setopt($ch, CURLOPT_POST, 1); // POST 104 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // Setting param x=a&y=z as POST fields 105 } elseif ($postorget == 'POSTALREADYFORMATED') { 106 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); // HTTP request is 'POST' but param string is taken as it is 107 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string 108 } elseif ($postorget == 'PUT') { 109 $array_param = null; 110 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT' 111 if (!is_array($param)) { 112 parse_str($param, $array_param); 113 } else { 114 dol_syslog("parameter param must be a string", LOG_WARNING); 115 $array_param = $param; 116 } 117 curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields 118 } elseif ($postorget == 'PUTALREADYFORMATED') { 119 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT' 120 curl_setopt($ch, CURLOPT_POSTFIELDS, $param); // param = content of post, like a xml string 121 } elseif ($postorget == 'HEAD') { 122 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'HEAD'); // HTTP request is 'HEAD' 123 curl_setopt($ch, CURLOPT_NOBODY, true); 124 } elseif ($postorget == 'DELETE') { 125 curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE'); // POST 126 } else { 127 curl_setopt($ch, CURLOPT_POST, 0); // GET 128 } 129 130 //if USE_PROXY constant set at begin of this method. 131 if ($USE_PROXY) { 132 dol_syslog("getURLContent set proxy to ".$PROXY_HOST.":".$PROXY_PORT." - ".$PROXY_USER.":".$PROXY_PASS); 133 //curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); // Curl 7.10 134 curl_setopt($ch, CURLOPT_PROXY, $PROXY_HOST.":".$PROXY_PORT); 135 if ($PROXY_USER) { 136 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $PROXY_USER.":".$PROXY_PASS); 137 } 138 } 139 140 $newUrl = $url; 141 $maxRedirection = 5; 142 $info = array(); 143 $response = ''; 144 145 do { 146 if ($maxRedirection < 1) { 147 break; 148 } 149 150 curl_setopt($ch, CURLOPT_URL, $newUrl); 151 152 // Parse $newUrl 153 $newUrlArray = parse_url($newUrl); 154 $hosttocheck = $newUrlArray['host']; 155 $hosttocheck = str_replace(array('[', ']'), '', $hosttocheck); // Remove brackets of IPv6 156 157 // Deny some reserved host names 158 if (in_array($hosttocheck, array('metadata.google.internal'))) { 159 $info['http_code'] = 400; 160 $info['content'] = 'Error bad hostname '.$hosttocheck.' (Used by Google metadata). This value for hostname is not allowed.'; 161 break; 162 } 163 164 // Clean host name $hosttocheck to convert it into an IP $iptocheck 165 if (in_array($hosttocheck, array('localhost', 'localhost.domain'))) { 166 $iptocheck = '127.0.0.1'; 167 } elseif (in_array($hosttocheck, array('ip6-localhost', 'ip6-loopback'))) { 168 $iptocheck = '::1'; 169 } else { 170 // Resolve $hosttocheck to get the IP $iptocheck and set CURLOPT_CONNECT_TO to use this ip so curl will not try another resolution that may give a different result 171 if (function_exists('gethostbyname')) { 172 $iptocheck = gethostbyname($hosttocheck); 173 } else { 174 $iptocheck = $hosttocheck; 175 } 176 // TODO Resolve ip v6 177 } 178 179 // Check $iptocheck is an IP (v4 or v6), if not clear value. 180 if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4 | FILTER_FLAG_IPV6)) { // This is not an IP, we clean data 181 $iptocheck = '0'; // 182 } 183 184 if ($iptocheck) { 185 if ($localurl == 0) { // Only external url allowed (dangerous, may allow to get malware) 186 if (!filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) { 187 // Deny ips like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10... 188 $info['http_code'] = 400; 189 $info['content'] = 'Error bad hostname IP (private or reserved range). Must be an external URL.'; 190 break; 191 } 192 if (!empty($_SERVER["SERVER_ADDR"]) && $iptocheck == $_SERVER["SERVER_ADDR"]) { 193 $info['http_code'] = 400; 194 $info['content'] = 'Error bad hostname IP (IP is a local IP). Must be an external URL.'; 195 break; 196 } 197 if (!empty($conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP) && in_array($iptocheck, explode(',', $conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP))) { 198 $info['http_code'] = 400; 199 $info['content'] = 'Error bad hostname IP (IP is a local IP defined into MAIN_SECURITY_SERVER_IP). Must be an external URL.'; 200 break; 201 } 202 } 203 if ($localurl == 1) { // Only local url allowed (dangerous, may allow to get metadata on server or make internal port scanning) 204 // Deny ips NOT like 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 0.0.0.0/8, 169.254.0.0/16, 127.0.0.0/8 et 240.0.0.0/4, ::1/128, ::/128, ::ffff:0:0/96, fe80::/10... 205 if (filter_var($iptocheck, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) { 206 $info['http_code'] = 400; 207 $info['content'] = 'Error bad hostname '.$iptocheck.'. Must be a local URL.'; 208 break; 209 } 210 if (!empty($conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP) && !in_array($iptocheck, explode(',', $conf->global->MAIN_SECURITY_ANTI_SSRF_SERVER_IP))) { 211 $info['http_code'] = 400; 212 $info['content'] = 'Error bad hostname IP (IP is not a local IP defined into list MAIN_SECURITY_SERVER_IP). Must be a local URL in allowed list.'; 213 break; 214 } 215 } 216 217 // Common check (local and external) 218 if (in_array($iptocheck, array('100.100.100.200'))) { 219 $info['http_code'] = 400; 220 $info['content'] = 'Error bad hostname IP (Used by Alibaba metadata). Must be an external URL.'; 221 break; 222 } 223 224 // Set CURLOPT_CONNECT_TO so curl will not try another resolution that may give a different result. Possible only on PHP v7+ 225 if (defined('CURLOPT_CONNECT_TO')) { 226 $connect_to = array(sprintf("%s:%d:%s:%d", $newUrlArray['host'], empty($newUrlArray['port'])?'':$newUrlArray['port'], $iptocheck, empty($newUrlArray['port'])?'':$newUrlArray['port'])); 227 //var_dump($newUrlArray); 228 //var_dump($connect_to); 229 curl_setopt($ch, CURLOPT_CONNECT_TO, $connect_to); 230 } 231 } 232 233 // Getting response from server 234 $response = curl_exec($ch); 235 236 $info = curl_getinfo($ch); // Reading of request must be done after sending request 237 $http_code = $info['http_code']; 238 239 if ($followlocation && ($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307)) { 240 $newUrl = $info['redirect_url']; 241 $maxRedirection--; 242 // TODO Use $info['local_ip'] and $info['primary_ip'] ? 243 continue; 244 } else { 245 $http_code = 0; 246 } 247 } while ($http_code); 248 249 $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request 250 251 dol_syslog("getURLContent request=".$request); 252 //dol_syslog("getURLContent response =".response); // This may contains binary data, so we dont output it 253 dol_syslog("getURLContent response size=".strlen($response)); // This may contains binary data, so we dont output it 254 255 $rep = array(); 256 if (curl_errno($ch)) { 257 // Add keys to $rep 258 $rep['content'] = $response; 259 260 // moving to display page to display curl errors 261 $rep['curl_error_no'] = curl_errno($ch); 262 $rep['curl_error_msg'] = curl_error($ch); 263 264 dol_syslog("getURLContent response array is ".join(',', $rep)); 265 } else { 266 //$info = curl_getinfo($ch); 267 268 // Add keys to $rep 269 $rep = $info; 270 //$rep['header_size']=$info['header_size']; 271 //$rep['http_code']=$info['http_code']; 272 dol_syslog("getURLContent http_code=".$rep['http_code']); 273 274 // Add more keys to $rep 275 if ($response) { 276 $rep['content'] = $response; 277 } 278 $rep['curl_error_no'] = ''; 279 $rep['curl_error_msg'] = ''; 280 } 281 282 //closing the curl 283 curl_close($ch); 284 285 return $rep; 286} 287 288 289/** 290 * Function get second level domain name. 291 * For example: https://www.abc.mydomain.com/dir/page.html return 'mydomain' 292 * 293 * @param string $url Full URL. 294 * @param int $mode 0=return 'mydomain', 1=return 'mydomain.com', 2=return 'abc.mydomain.com' 295 * @return string Returns domaine name 296 */ 297function getDomainFromURL($url, $mode = 0) 298{ 299 $tmpdomain = preg_replace('/^https?:\/\//i', '', $url); // Remove http(s):// 300 $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain 301 if ($mode == 2) { 302 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)\.([^\.]+)$/', '\1.\2.\3', $tmpdomain); // Remove part 'www.' before 'abc.mydomain.com' 303 } else { 304 $tmpdomain = preg_replace('/^.*\.([^\.]+)\.([^\.]+)$/', '\1.\2', $tmpdomain); // Remove part 'www.abc.' before 'mydomain.com' 305 } 306 if (empty($mode)) { 307 $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...) 308 } 309 310 return $tmpdomain; 311} 312 313/** 314 * Function root url from a long url 315 * For example: https://www.abc.mydomain.com/dir/page.html return 'https://www.abc.mydomain.com' 316 * For example: http://www.abc.mydomain.com/ return 'https://www.abc.mydomain.com' 317 * 318 * @param string $url Full URL. 319 * @return string Returns root url 320 */ 321function getRootURLFromURL($url) 322{ 323 $prefix = ''; 324 $tmpurl = $url; 325 $reg = null; 326 if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) { 327 $prefix = $reg[1]; 328 } 329 $tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s):// 330 $tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain 331 332 return $prefix.$tmpurl; 333} 334 335/** 336 * Function to remove comments into HTML content 337 * 338 * @param string $content Text content 339 * @return string Returns text without HTML comments 340 */ 341function removeHtmlComment($content) 342{ 343 $content = preg_replace('/<!--[^\-]+-->/', '', $content); 344 return $content; 345} 346