我如何在javascript中解析URL到主机名和路径?

我想拿一根绳子

var a = "http://example.com/aa/bb/"

然后把它加工成一个物体

a.hostname == "example.com"

而且

a.pathname == "/aa/bb"
550354 次浏览

< a href = " http://code.google.com/p/js-uri/ " rel = " noreferrer > js-uri < / >(在谷歌Code上可用)接受字符串URL并从中解析URI对象:

var some_uri = new URI("http://www.example.com/foo/bar");


alert(some_uri.authority); // www.example.com
alert(some_uri);           // http://www.example.com/foo/bar


var blah      = new URI("blah");
var blah_full = blah.resolve(some_uri);
alert(blah_full);         // http://www.example.com/foo/blah
var getLocation = function(href) {
var l = document.createElement("a");
l.href = href;
return l;
};
var l = getLocation("http://example.com/path");
console.debug(l.hostname)
>> "example.com"
console.debug(l.pathname)
>> "/path"

下面是我从https://gist.github.com/1847816中复制的一个版本,但重写了,以便于阅读和调试。将锚数据的值复制到另一个名为“result”的变量的目的是因为锚数据相当长,因此将有限数量的值复制到结果将有助于简化结果。

/**
* See: https://gist.github.com/1847816
* Parse a URI, returning an object similar to Location
* Usage: var uri = parseUri("hello?search#hash")
*/
function parseUri(url) {


var result = {};


var anchor = document.createElement('a');
anchor.href = url;


var keys = 'protocol hostname host pathname port search hash href'.split(' ');
for (var keyIndex in keys) {
var currentKey = keys[keyIndex];
result[currentKey] = anchor[currentKey];
}


result.toString = function() { return anchor.href; };
result.requestUri = result.pathname + result.search;
return result;


}

freddiefujiwara的答案很好,但我也需要在ie中支持相对url。我想出了以下解决方案:

function getLocation(href) {
var location = document.createElement("a");
location.href = href;
// IE doesn't populate all link properties when setting .href with a relative URL,
// however .href will return an absolute URL which then can be used on itself
// to populate these additional fields.
if (location.host == "") {
location.href = location.href;
}
return location;
};

现在使用它来获得所需的属性:

var a = getLocation('http://example.com/aa/bb/');
document.write(a.hostname);
document.write(a.pathname);

例子:

function getLocation(href) {
var location = document.createElement("a");
location.href = href;
// IE doesn't populate all link properties when setting .href with a relative URL,
// however .href will return an absolute URL which then can be used on itself
// to populate these additional fields.
if (location.host == "") {
location.href = location.href;
}
return location;
};
var urlToParse = 'http://example.com/aa/bb/',
a = getLocation(urlToParse);
document.write('Absolute URL: ' + urlToParse);
document.write('<br />');
document.write('Hostname: ' + a.hostname);
document.write('<br />');
document.write('Pathname: ' + a.pathname);

你也可以使用Locutus项目(前php.js)中的parse_url()函数。

代码:

parse_url('http://username:password@hostname/path?arg=value#anchor');

结果:

{
scheme: 'http',
host: 'hostname',
user: 'username',
pass: 'password',
path: '/path',
query: 'arg=value',
fragment: 'anchor'
}

找到这里:https://gist.github.com/jlong/2428561

var parser = document.createElement('a');
parser.href = "http://example.com:3000/pathname/?search=test#hash";


parser.protocol; // => "http:"
parser.host;     // => "example.com:3000"
parser.hostname; // => "example.com"
parser.port;     // => "3000"
parser.pathname; // => "/pathname/"
parser.hash;     // => "#hash"
parser.search;   // => "?search=test"
parser.origin;   // => "http://example.com:3000"

AngularJS的方法在这里:http://jsfiddle.net/PT5BG/4/

<!DOCTYPE html>
<html>
<head>
<title>Parse URL using AngularJS</title>
</head>
<body ng-app ng-controller="AppCtrl" ng-init="init()">


<h3>Parse URL using AngularJS</h3>


url: <input type="text" ng-model="url" value="" style="width:780px;">


<ul>
<li>href = \{\{parser.href}}</li>
<li>protocol = \{\{parser.protocol}}</li>
<li>host = \{\{parser.host}}</li>
<li>hostname = \{\{parser.hostname}}</li>
<li>port = \{\{parser.port}}</li>
<li>pathname = \{\{parser.pathname}}</li>
<li>hash = \{\{parser.hash}}</li>
<li>search = \{\{parser.search}}</li>
</ul>


<script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.0.6/angular.min.js"></script>


<script>
function AppCtrl($scope) {


$scope.$watch('url', function() {
$scope.parser.href = $scope.url;
});


$scope.init = function() {
$scope.parser = document.createElement('a');
$scope.url = window.location;
}


}
</script>


</body>
</html>

对于那些正在寻找在IE, Firefox和Chrome中工作的现代解决方案的人:

这些使用超链接元素的解决方案都不会在chrome中起同样的作用。如果你传递一个无效(或空白)的url给chrome,它将总是返回脚本被调用的主机。所以在IE中你会得到空白,而在Chrome中你会得到localhost(或其他什么)。

如果你试图看着推荐人,这是骗人的。你需要确保你返回的主机在原始url中,以处理这个问题:

    function getHostNameFromUrl(url) {
// <summary>Parses the domain/host from a given url.</summary>
var a = document.createElement("a");
a.href = url;


// Handle chrome which will default to domain where script is called from if invalid
return url.indexOf(a.hostname) != -1 ? a.hostname : '';
}

下面是一个使用regexp的简单函数,它模仿a标记行为。

优点

  • 可预测的行为(无跨浏览器问题)
  • 不需要DOM
  • 它真的很短。

缺点

  • regexp有点难读

-

function getLocation(href) {
var match = href.match(/^(https?\:)\/\/(([^:\/?#]*)(?:\:([0-9]+))?)([\/]{0,1}[^?#]*)(\?[^#]*|)(#.*|)$/);
return match && {
href: href,
protocol: match[1],
host: match[2],
hostname: match[3],
port: match[4],
pathname: match[5],
search: match[6],
hash: match[7]
}
}

-

getLocation("http://example.com/");
/*
{
"protocol": "http:",
"host": "example.com",
"hostname": "example.com",
"port": undefined,
"pathname": "/"
"search": "",
"hash": "",
}
*/


getLocation("http://example.com:3000/pathname/?search=test#hash");
/*
{
"protocol": "http:",
"host": "example.com:3000",
"hostname": "example.com",
"port": "3000",
"pathname": "/pathname/",
"search": "?search=test",
"hash": "#hash"
}
*/

编辑:

下面是正则表达式的分解

var reURLInformation = new RegExp([
'^(https?:)//', // protocol
'(([^:/?#]*)(?::([0-9]+))?)', // host (hostname and port)
'(/{0,1}[^?#]*)', // pathname
'(\\?[^#]*|)', // search
'(#.*|)$' // hash
].join(''));
var match = href.match(reURLInformation);

现代方式:

new URL("http://example.com/aa/bb/")

返回一个具有hostnamepathname其他一些属性的对象。

第一个参数是相对或绝对URL;如果它是相对的,那么你需要指定第二个参数(基本URL)。例如,对于相对于当前页面的URL:

new URL("/aa/bb/", location)

除了浏览器,这个API在Node.js中也可用从v7开始,一直到require('url').URL

使用模块模式的简单而健壮的解决方案。这包括对IE的修复,其中pathname并不总是有一个前导正斜杠(/)。

我已经创建了一个要点和一个JSFiddle,它提供了一个更动态的解析器。我建议你检查一下并提供反馈。

var URLParser = (function (document) {
var PROPS = 'protocol hostname host pathname port search hash href'.split(' ');
var self = function (url) {
this.aEl = document.createElement('a');
this.parse(url);
};
self.prototype.parse = function (url) {
this.aEl.href = url;
if (this.aEl.host == "") {
this.aEl.href = this.aEl.href;
}
PROPS.forEach(function (prop) {
switch (prop) {
case 'hash':
this[prop] = this.aEl[prop].substr(1);
break;
default:
this[prop] = this.aEl[prop];
}
}, this);
if (this.pathname.indexOf('/') !== 0) {
this.pathname = '/' + this.pathname;
}
this.requestUri = this.pathname + this.search;
};
self.prototype.toObj = function () {
var obj = {};
PROPS.forEach(function (prop) {
obj[prop] = this[prop];
}, this);
obj.requestUri = this.requestUri;
return obj;
};
self.prototype.toString = function () {
return this.href;
};
return self;
})(document);

演示

.
var URLParser = (function(document) {
var PROPS = 'protocol hostname host pathname port search hash href'.split(' ');
var self = function(url) {
this.aEl = document.createElement('a');
this.parse(url);
};
self.prototype.parse = function(url) {
this.aEl.href = url;
if (this.aEl.host == "") {
this.aEl.href = this.aEl.href;
}
PROPS.forEach(function(prop) {
switch (prop) {
case 'hash':
this[prop] = this.aEl[prop].substr(1);
break;
default:
this[prop] = this.aEl[prop];
}
}, this);
if (this.pathname.indexOf('/') !== 0) {
this.pathname = '/' + this.pathname;
}
this.requestUri = this.pathname + this.search;
};
self.prototype.toObj = function() {
var obj = {};
PROPS.forEach(function(prop) {
obj[prop] = this[prop];
}, this);
obj.requestUri = this.requestUri;
return obj;
};
self.prototype.toString = function() {
return this.href;
};
return self;
})(document);


/* Main */
var out = document.getElementById('out');
var urls = [
'https://www.example.org:5887/foo/bar?a=1&b=2#section-1',
'ftp://www.files.com:22/folder?id=7'
];
var parser = new URLParser();
urls.forEach(function(url) {
parser.parse(url);
println(out, JSON.stringify(parser.toObj(), undefined, ' '), 0, '#0000A7');
});


/* Utility functions */
function print(el, text, bgColor, fgColor) {
var span = document.createElement('span');
span.innerHTML = text;
span.style['backgroundColor'] = bgColor || '#FFFFFF';
span.style['color'] = fgColor || '#000000';
el.appendChild(span);
}
function println(el, text, bgColor, fgColor) {
print(el, text, bgColor, fgColor);
el.appendChild(document.createElement('br'));
}
body {
background: #444;
}
span {
background-color: #fff;
border: thin solid black;
display: inline-block;
}
#out {
display: block;
font-family: Consolas, Menlo, Monaco, Lucida Console, Liberation Mono, DejaVu Sans Mono, Bitstream Vera Sans Mono, Courier New, monospace, serif;
font-size: 12px;
white-space: pre;
}
<div id="out"></div>

Output

{
"protocol": "https:",
"hostname": "www.example.org",
"host": "www.example.org:5887",
"pathname": "/foo/bar",
"port": "5887",
"search": "?a=1&b=2",
"hash": "section-1",
"href": "https://www.example.org:5887/foo/bar?a=1&b=2#section-1",
"requestUri": "/foo/bar?a=1&b=2"
}
{
"protocol": "ftp:",
"hostname": "www.files.com",
"host": "www.files.com:22",
"pathname": "/folder",
"port": "22",
"search": "?id=7",
"hash": "",
"href": "ftp://www.files.com:22/folder?id=7",
"requestUri": "/folder?id=7"
}

别再白费力气了。使用https://github.com/medialize/URI.js/

var uri = new URI("http://example.org:80/foo/hello.html");
// get host
uri.host(); // returns string "example.org:80"
// set host
uri.host("example.org:80");
var loc = window.location;  // => "http://example.com:3000/pathname/?search=test#hash"

返回当前turl。

如果你想传递你自己的字符串作为url (不能在IE11中工作):

var loc = new URL("http://example.com:3000/pathname/?search=test#hash")

然后你可以这样解析它:

loc.protocol; // => "http:"
loc.host;     // => "example.com:3000"
loc.hostname; // => "example.com"
loc.port;     // => "3000"
loc.pathname; // => "/pathname/"
loc.hash;     // => "#hash"
loc.search;   // => "?search=test"

只需使用url.js库(用于web和node.js)。

https://github.com/websanova/js-url

url: http://example.com?param=test#param=again


url('?param'); // test
url('#param'); // again
url('protocol'); // http
url('port'); // 80
url('domain'); // example.com
url('tld'); // com


etc...

那么简单的正则表达式呢?

url = "http://www.example.com/path/to/somwhere";
urlParts = /^(?:\w+\:\/\/)?([^\/]+)(.*)$/.exec(url);
hostname = urlParts[1]; // www.example.com
path = urlParts[2]; // /path/to/somwhere

今天我遇到了这个问题,我发现:URL - MDN Web api

var url = new URL("http://test.example.com/dir/subdir/file.html#hash");

这返回:

{ hash:"#hash", host:"test.example.com", hostname:"test.example.com", href:"http://test.example.com/dir/subdir/file.html#hash", origin:"http://test.example.com", password:"", pathname:"/dir/subdir/file.html", port:"", protocol:"http:", search: "", username: "" }

希望我的第一篇文章能帮助到你!

跨浏览器URL解析,在IE的相对路径问题 6,7,8和9周围工作:

function ParsedUrl(url) {
var parser = document.createElement("a");
parser.href = url;


// IE 8 and 9 dont load the attributes "protocol" and "host" in case the source URL
// is just a pathname, that is, "/example" and not "http://domain.com/example".
parser.href = parser.href;


// IE 7 and 6 wont load "protocol" and "host" even with the above workaround,
// so we take the protocol/host from window.location and place them manually
if (parser.host === "") {
var newProtocolAndHost = window.location.protocol + "//" + window.location.host;
if (url.charAt(1) === "/") {
parser.href = newProtocolAndHost + url;
} else {
// the regex gets everything up to the last "/"
// /path/takesEverythingUpToAndIncludingTheLastForwardSlash/thisIsIgnored
// "/" is inserted before because IE takes it of from pathname
var currentFolder = ("/"+parser.pathname).match(/.*\//)[0];
parser.href = newProtocolAndHost + currentFolder + url;
}
}


// copies all the properties to this object
var properties = ['host', 'hostname', 'hash', 'href', 'port', 'protocol', 'search'];
for (var i = 0, n = properties.length; i < n; i++) {
this[properties[i]] = parser[properties[i]];
}


// pathname is special because IE takes the "/" of the starting of pathname
this.pathname = (parser.pathname.charAt(0) !== "/" ? "/" : "") + parser.pathname;
}

使用(演示JSFiddle在这里):

var myUrl = new ParsedUrl("http://www.example.com:8080/path?query=123#fragment");

结果:

{
hash: "#fragment"
host: "www.example.com:8080"
hostname: "www.example.com"
href: "http://www.example.com:8080/path?query=123#fragment"
pathname: "/path"
port: "8080"
protocol: "http:"
search: "?query=123"
}
function parseUrl(url) {
var m = url.match(/^((?:([^:\/?#]+:)(?:\/\/))?((?:([^\/?#:]*):([^\/?#:]*)@)?([^\/?#:]*)(?::([^\/?#:]*))?))?([^?#]*)(\?[^#]*)?(#.*)?$/),
r = {
hash: m[10] || "",                   // #asd
host: m[3] || "",                    // localhost:257
hostname: m[6] || "",                // localhost
href: m[0] || "",                    // http://username:password@localhost:257/deploy/?asd=asd#asd
origin: m[1] || "",                  // http://username:password@localhost:257
pathname: m[8] || (m[1] ? "/" : ""), // /deploy/
port: m[7] || "",                    // 257
protocol: m[2] || "",                // http:
search: m[9] || "",                  // ?asd=asd
username: m[4] || "",                // username
password: m[5] || ""                 // password
};
if (r.protocol.length == 2) {
r.protocol = "file:///" + r.protocol.toUpperCase();
r.origin = r.protocol + "//" + r.host;
}
r.href = r.origin + r.pathname + r.search + r.hash;
return r;
};
parseUrl("http://username:password@localhost:257/deploy/?asd=asd#asd");

它既适用于绝对url,也适用于相对url

为此使用https://www.npmjs.com/package/uri-parse-lib

var t = parserURI("http://user:pass@example.com:8080/directory/file.ext?query=1&next=4&sed=5#anchor");

为什么不用呢?

        $scope.get_location=function(url_str){
var parser = document.createElement('a');
parser.href =url_str;//"http://example.com:3000/pathname/?search=test#hash";
var info={
protocol:parser.protocol,
hostname:parser.hostname, // => "example.com"
port:parser.port,     // => "3000"
pathname:parser.pathname, // => "/pathname/"
search:parser.search,   // => "?search=test"
hash:parser.hash,     // => "#hash"
host:parser.host, // => "example.com:3000"
}
return info;
}
alert( JSON.stringify( $scope.get_location("http://localhost:257/index.php/deploy/?asd=asd#asd"),null,4 ) );

第一个答案的简单破解

var getLocation = function(href=window.location.href) {
var l = document.createElement("a");
l.href = href;
return l;
};

这可以使用,即使没有参数,以找出当前的主机名 getLocation()。Hostname 将给出当前主机名

扩展acdcjunior解决方案,添加"searchParam"函数< br > 模拟URL对象,添加"searchParam"解析查询字符串
适用于IE 6, 7, 8 9, 10, 11

__abc0 - (__abc1)

// USAGE:
var myUrl = new ParsedUrl("http://www.example.com/path?var1=123&var2=abc#fragment");
console.log(myUrl);
console.log(myUrl.searchParam('var1'));
console.log(myUrl.searchParam('var2'));

__abc0 - (__abc1)

{
hash: "#fragment",
host: "www.example.com:8080",
hostname: "www.example.com",
href: "http://www.example.com:8080/path?var1=123&amp;var2=abc#fragment",
pathname: "/path",
port: "80",
protocol: "http:",
search: "?var1=123&amp;var2=abc"
}


"123"
"abc"

__abc0 - (__abc1)

function ParsedUrl(url) {
var parser = document.createElement("a");
parser.href = url;
    

// IE 8 and 9 dont load the attributes "protocol" and "host" in case the source URL
// is just a pathname, that is, "/example" and not "http://www.example.com/example".
parser.href = parser.href;
    

// IE 7 and 6 wont load "protocol" and "host" even with the above workaround,
// so we take the protocol/host from window.location and place them manually
if (parser.host === "") {
var newProtocolAndHost = window.location.protocol + "//" + window.location.host;
if (url.charAt(1) === "/") {
parser.href = newProtocolAndHost + url;
} else {
// the regex gets everything up to the last "/"
// /path/takesEverythingUpToAndIncludingTheLastForwardSlash/thisIsIgnored
// "/" is inserted before because IE takes it of from pathname
var currentFolder = ("/"+parser.pathname).match(/.*\//)[0];
parser.href = newProtocolAndHost + currentFolder + url;
}
}
    

// copies all the properties to this object
var properties = ['host', 'hostname', 'hash', 'href', 'port', 'protocol', 'search'];
for (var i = 0, n = properties.length; i < n; i++) {
this[properties[i]] = parser[properties[i]];
}
    

// pathname is special because IE takes the "/" of the starting of pathname
this.pathname = (parser.pathname.charAt(0) !== "/" ? "/" : "") + parser.pathname;
  

//search Params
this.searchParam =  function(variable) {
var query = (this.search.indexOf('?') === 0) ? this.search.substr(1) : this.search;
var vars = query.split('&');
for (var i = 0; i < vars.length; i++) {
var pair = vars[i].split('=');
if (decodeURIComponent(pair[0]) == variable) {
return decodeURIComponent(pair[1]);
}
}
console.log('Query variable %s not found', variable);
return '';
};
}

试试这个:

function getUrlPath(str){
//fakepath when url not have a path
var fakepath = "/FakPath";
var url = str+fakepath;
var reg = /.+?\:\/\/.+?(\/.+?)(?:#|\?|$)/;
var output = reg.exec(url);
// check "output" != null
return (output) ? output[1].replace(fakepath,"") : fakepath;
}


var myurl = "https://stackoverflow.com/questions/736513/";
const path = getUrlPath(myurl);


console.log(   path     );
//output :  /questions/736513/

这不会解析查询和散列,但除此之外它工作得很好。

const getURIParts = (url) => {
const matches = url.match(/^(\w+?:\/\/)?([\w-\.]+(?=\/?))?:?(\d*)?([^:]*)/)
return {
scheme: matches ? matches[1] : undefined,
host: matches ? matches[2] : '',
port: matches ? matches[3] : undefined,
pathname: matches ? matches[4] : ''
}
}


console.log(getURIParts(""))
console.log(getURIParts("http://localhost/bla"))
console.log(getURIParts("https://api.spotify.com/"))
console.log(getURIParts("https://api.spotify.com"))
console.log(getURIParts("wss://wss.slack.com/link/?ticket=1234-5678"))
console.log(getURIParts("localhost"))
console.log(getURIParts("localhost/bla"))
console.log(getURIParts("localhost/"))
console.log(getURIParts("api.spotify.com/bla/two"))
console.log(getURIParts("api.spotify.com:8000/bla/two"))
console.log(getURIParts("https://api.spotify.com:8800/"))
console.log(getURIParts("/mp3-preview/f504e6b8e037771318656394f532dede4f9bcaea"))

当然,在在2016年中,正确答案是使用< >强URL API < / >强
对于页面URL window.location
<a href="..."> HTMLAnchorElement API

也支持旧浏览器,使用polyfill:

<script crossorigin="anonymous" src="https://polyfill.io/v3/polyfill.min.js?features=URL"></script>

但就像其他选项一样,它可以通过正则表达式模式简单而准确地处理:

function URIinfo(s) {
s=s.match(/^(([^/]*?:)\/*((?:([^:]+):([^@]+)@)?([^/:]{2,}|\[[\w:]+])(:\d*)?(?=\/|$))?)?((.*?\/)?(([^/]*?)(\.[^/.]+?)?))(\?.*?)?(#.*)?$/);
return {origin:s[1],protocol:s[2],host:s[3],username:s[4],password:s[5],hostname:s[6],port:s[7],path:s[8],folders:s[9],file:s[10],filename:s[11],fileext:s[12],search:s[13],hash:s[14]};
}


var sample='http://user:password@my.site.com:8080/onefolder/folder.name/file.min.js?query=http://my.site.com:8080/file.exe#hash-root:/file/1.txt';


console.log (URIinfo(sample));
/*
{
"origin": "http://user:password@my.site.com:8080",
"protocol": "http:",
"host": "user:password@my.site.com:8080",
"username": "user",
"password": "password",
"hostname": "my.site.com",
"port": ":8080",
"path": "/onefolder/folder.name/file.min.js",
"folders": "/onefolder/folder.name/",
"file": "file.min.js",
"filename": "file.min",
"fileext": ".js",
"search": "?query=http://my.site.com:8080/file.exe",
"hash": "#hash-root:/file/1.txt"
}
*/

使用< >强正则表达式< / >强

任何形式的

  • 绝对/相对路径
  • IPv4 / IPv6
  • 网络协议/本地文件
  • 查询/散列

并返回所有URL选项,但不包括searchParams

(+)也会像PHP pathInfo一样返回文件信息

怎么样?

'https://stackoverflow.com/questions/736513/how-do-i-parse-a-url-into-hostname-and-path-in-javascript'.split('//').pop().split('/')[0]

结果:

'stackoverflow.com'