编码/解码 URL

在 Go 中对整个 URL 进行编码和解码的推荐方法是什么?我知道方法 url.QueryEscapeurl.QueryUnescape,但他们似乎不正是我要寻找的。具体来说,我正在寻找像 JavaScript 的 encodeURIComponentdecodeURIComponent这样的方法。

67483 次浏览

来自 基于 encodeURIComponent 的 MDN:

EncodeURIComponent 转义除下列字符之外的所有字符: 字母、十进制数字、 '-', '_', '.', '!', '~', '*', ''', '(', ')'

Go 的 url.QueryEscape 实现(特别是 shouldEscape私有函数)转义除下列字符之外的所有字符: 字母、十进制数字、 '-', '_', '.', '~'

与 Javascript 不同,Go 的 QueryEscape () 威尔转义 '!', '*', ''', '(', ')'。基本上,Go 的版本是严格遵循 RFC-3986的。 Javascript 更松散:

如果一个人希望更严格地遵守 RFC3986(保留!’、(、)和 *) ,即使这些字符没有正式的 URI 分隔用法,也可以安全地使用以下内容:

function fixedEncodeURIComponent (str) {
return encodeURIComponent(str).replace(/[!'()]/g, escape).replace(/\*/g, "%2A");
}

您可以使用 Net/url模块进行所有需要的 URL 编码。它不会为 URL 的各个部分分解单独的编码函数,您必须让它构造整个 URL。在对源代码进行了一番研究之后,我认为它做得非常好,并且符合标准。

下面是一个例子(游乐场连接路)

package main


import (
"fmt"
"net/url"
)


func main() {


Url, err := url.Parse("http://www.example.com")
if err != nil {
panic("boom")
}


Url.Path += "/some/path/or/other_with_funny_characters?_or_not/"
parameters := url.Values{}
parameters.Add("hello", "42")
parameters.Add("hello", "54")
parameters.Add("vegetable", "potato")
Url.RawQuery = parameters.Encode()


fmt.Printf("Encoded URL is %q\n", Url.String())
}

指纹

Encoded URL is "http://www.example.com/some/path/or/other_with_funny_characters%3F_or_not/?vegetable=potato&hello=42&hello=54"

为了模仿 Javascript 的 encodeURIComponent(),我创建了一个字符串助手函数。

示例: 将 "My String"转换为 "My%20String"

Https://github.com/mrap/stringutil/blob/master/urlencode.go

import "net/url"


// UrlEncoded encodes a string like Javascript's encodeURIComponent()
func UrlEncoded(str string) (string, error) {
u, err := url.Parse(str)
if err != nil {
return "", err
}
return u.String(), nil
}

这里有一个 escape 和 unescape 的实现(摘自 go source) :

package main




import (
"fmt"
"strconv"
)




const (
encodePath encoding = 1 + iota
encodeHost
encodeUserPassword
encodeQueryComponent
encodeFragment
)


type encoding int
type EscapeError string


func (e EscapeError) Error() string {
return "invalid URL escape " + strconv.Quote(string(e))
}




func ishex(c byte) bool {
switch {
case '0' <= c && c <= '9':
return true
case 'a' <= c && c <= 'f':
return true
case 'A' <= c && c <= 'F':
return true
}
return false
}


func unhex(c byte) byte {
switch {
case '0' <= c && c <= '9':
return c - '0'
case 'a' <= c && c <= 'f':
return c - 'a' + 10
case 'A' <= c && c <= 'F':
return c - 'A' + 10
}
return 0
}






// Return true if the specified character should be escaped when
// appearing in a URL string, according to RFC 3986.
//
// Please be informed that for now shouldEscape does not check all
// reserved characters correctly. See golang.org/issue/5684.
func shouldEscape(c byte, mode encoding) bool {
// §2.3 Unreserved characters (alphanum)
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
return false
}


if mode == encodeHost {
// §3.2.2 Host allows
//  sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
// as part of reg-name.
// We add : because we include :port as part of host.
// We add [ ] because we include [ipv6]:port as part of host
switch c {
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']':
return false
}
}


switch c {
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
return false


case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
// Different sections of the URL allow a few of
// the reserved characters to appear unescaped.
switch mode {
case encodePath: // §3.3
// The RFC allows : @ & = + $ but saves / ; , for assigning
// meaning to individual path segments. This package
// only manipulates the path as a whole, so we allow those
// last two as well. That leaves only ? to escape.
return c == '?'


case encodeUserPassword: // §3.2.1
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
// userinfo, so we must escape only '@', '/', and '?'.
// The parsing of userinfo treats ':' as special so we must escape
// that too.
return c == '@' || c == '/' || c == '?' || c == ':'


case encodeQueryComponent: // §3.4
// The RFC reserves (so we must escape) everything.
return true


case encodeFragment: // §4.1
// The RFC text is silent but the grammar allows
// everything, so escape nothing.
return false
}
}


// Everything else must be escaped.
return true
}








func escape(s string, mode encoding) string {
spaceCount, hexCount := 0, 0
for i := 0; i < len(s); i++ {
c := s[i]
if shouldEscape(c, mode) {
if c == ' ' && mode == encodeQueryComponent {
spaceCount++
} else {
hexCount++
}
}
}


if spaceCount == 0 && hexCount == 0 {
return s
}


t := make([]byte, len(s)+2*hexCount)
j := 0
for i := 0; i < len(s); i++ {
switch c := s[i]; {
case c == ' ' && mode == encodeQueryComponent:
t[j] = '+'
j++
case shouldEscape(c, mode):
t[j] = '%'
t[j+1] = "0123456789ABCDEF"[c>>4]
t[j+2] = "0123456789ABCDEF"[c&15]
j += 3
default:
t[j] = s[i]
j++
}
}
return string(t)
}




// unescape unescapes a string; the mode specifies
// which section of the URL string is being unescaped.
func unescape(s string, mode encoding) (string, error) {
// Count %, check that they're well-formed.
n := 0
hasPlus := false
for i := 0; i < len(s); {
switch s[i] {
case '%':
n++
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
s = s[i:]
if len(s) > 3 {
s = s[:3]
}
return "", EscapeError(s)
}
i += 3
case '+':
hasPlus = mode == encodeQueryComponent
i++
default:
i++
}
}


if n == 0 && !hasPlus {
return s, nil
}


t := make([]byte, len(s)-2*n)
j := 0
for i := 0; i < len(s); {
switch s[i] {
case '%':
t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
j++
i += 3
case '+':
if mode == encodeQueryComponent {
t[j] = ' '
} else {
t[j] = '+'
}
j++
i++
default:
t[j] = s[i]
j++
i++
}
}
return string(t), nil
}




func EncodeUriComponent(rawString string) string{
return escape(rawString, encodeFragment)
}


func DecodeUriCompontent(encoded string) (string, error){
return unescape(encoded, encodeQueryComponent)
}




// https://golang.org/src/net/url/url.go
// http://remove-line-numbers.ruurtjan.com/
func main() {
// http://www.url-encode-decode.com/
origin := "äöüHel/lo world"
encoded := EncodeUriComponent(origin)
fmt.Println(encoded)


s, _ := DecodeUriCompontent(encoded)
fmt.Println(s)
}


// -------------------------------------------------------


/*
func UrlEncoded(str string) (string, error) {
u, err := url.Parse(str)
if err != nil {
return "", err
}
return u.String(), nil
}




// http://stackoverflow.com/questions/13820280/encode-decode-urls
// import "net/url"
func old_main() {
a,err := UrlEncoded("hello world")
if err != nil {
fmt.Println(err)
}
fmt.Println(a)


// https://gobyexample.com/url-parsing
//s := "postgres://user:pass@host.com:5432/path?k=v#f"
s := "postgres://user:pass@host.com:5432/path?k=vbla%23fooa#f"
u, err := url.Parse(s)
if err != nil {
panic(err)
}




fmt.Println(u.RawQuery)
fmt.Println(u.Fragment)
fmt.Println(u.String())
m, _ := url.ParseQuery(u.RawQuery)
fmt.Println(m)
fmt.Println(m["k"][0])


}
*/


// -------------------------------------------------------

这样吧:

template.URLQueryEscaper(path)

到了 Go 1.8,情况发生了变化。现在,除了旧的 QueryEscape之外,我们还可以访问 PathEscape来编码路径组件,以及 unescape 对应的 PathUnescape

如果有人想要得到与 JS encodeURIComponent 相比的精确结果,请尝试我的函数,它很脏,但工作得很好。

Https://gist.github.com/czyang/7ae30f4f625fee14cfc40c143e1b78bf

// #Warning! You Should Use this Code Carefully, and As Your Own Risk.
package main


import (
"fmt"
"net/url"
"strings"
)
/*
After hours searching, I can't find any method can get the result exact as the JS encodeURIComponent function.
In my situation I need to write a sign method which need encode the user input exact same as the JS encodeURIComponent.
This function does solved my problem.
*/
func main() {
params := url.Values{
"test_string": {"+!+'( )*-._~0-👿  👿9a-zA-Z 中文测试 test with ❤️ !@#$%^&&*()~<>?/.,;'[][]:\{\{}|{}|"},
}
urlEncode := params.Encode()
fmt.Println(urlEncode)
urlEncode = compatibleRFC3986Encode(urlEncode)
fmt.Println("RFC3986", urlEncode)
urlEncode = compatibleJSEncodeURIComponent(urlEncode)
fmt.Println("JS encodeURIComponent", urlEncode)
}


// Compatible with RFC 3986.
func compatibleRFC3986Encode(str string) string {
resultStr := str
resultStr = strings.Replace(resultStr, "+", "%20", -1)
return resultStr
}


// This func mimic JS encodeURIComponent, JS is wild and not very strict.
func compatibleJSEncodeURIComponent(str string) string {
resultStr := str
resultStr = strings.Replace(resultStr, "+", "%20", -1)
resultStr = strings.Replace(resultStr, "%21", "!", -1)
resultStr = strings.Replace(resultStr, "%27", "'", -1)
resultStr = strings.Replace(resultStr, "%28", "(", -1)
resultStr = strings.Replace(resultStr, "%29", ")", -1)
resultStr = strings.Replace(resultStr, "%2A", "*", -1)
return resultStr
}

希望这个能帮上忙

 // url encoded
func UrlEncodedISO(str string) (string, error) {
u, err := url.Parse(str)
if err != nil {
return "", err
}
q := u.Query()
return q.Encode(), nil
}
 * encoded into %2A
 # encoded into %23
 % encoded into %25
 < encoded into %3C
 > encoded into %3E
 + encoded into %2B
 enter key (#13#10) is encoded into %0D%0A