我有一个Go1.5.1进程/应用程序。当我在那个进程上运行/usr/sbin/lsof -p
时,我看到很多"无法识别协议"。
monitor_ 13105 root 101u sock 0,6 0t0 16960100 can't identify protocol
monitor_ 13105 root 102u sock 0,6 0t0 21552427 can't identify protocol
monitor_ 13105 root 103u sock 0,6 0t0 17565091 can't identify protocol
monitor_ 13105 root 104u sock 0,6 0t0 18476870 can't identify protocol
proc状态/限制/fd
[root@Monitor_q ~]# cat /proc/13105/status
Name: monitor_client
State: S (sleeping)
Tgid: 13105
Pid: 13105
PPid: 13104
TracerPid: 0
Uid: 0 0 0 0
Gid: 0 0 0 0
Utrace: 0
FDSize: 16384
Groups:
...
[root@Monitor_q ~]# cat /proc/13105/limits
Limit Soft Limit Hard Limit Units
Max cpu time unlimited unlimited seconds
Max file size unlimited unlimited bytes
Max data size unlimited unlimited bytes
Max stack size 10485760 unlimited bytes
Max core file size 0 unlimited bytes
Max resident set unlimited unlimited bytes
Max processes 3870 3870 processes
Max open files 9999 9999 files
Max locked memory 65536 65536 bytes
Max address space unlimited unlimited bytes
Max file locks unlimited unlimited locks
Max pending signals 3870 3870 signals
Max msgqueue size 819200 819200 bytes
Max nice priority 0 0
Max realtime priority 0 0
Max realtime timeout unlimited unlimited us
[root@Monitor_q ~]# ll /proc/13105/fd/
lrwx------ 1 root root 64 Dec 7 00:15 8382 -> socket:[52023221]
lrwx------ 1 root root 64 Dec 7 00:15 8383 -> socket:[51186627]
lrwx------ 1 root root 64 Dec 7 00:15 8384 -> socket:[51864232]
lrwx------ 1 root root 64 Dec 7 00:15 8385 -> socket:[52435453]
lrwx------ 1 root root 64 Dec 7 00:15 8386 -> socket:[51596071]
lrwx------ 1 root root 64 Dec 7 00:15 8387 -> socket:[52767667]
lrwx------ 1 root root 64 Dec 7 00:15 8388 -> socket:[52090632]
lrwx------ 1 root root 64 Dec 7 00:15 8389 -> socket:[51739068]
lrwx------ 1 root root 64 Dec 7 00:15 839 -> socket:[22963529]
lrwx------ 1 root root 64 Dec 7 00:15 8390 -> socket:[52023223]
lrwx------ 1 root root 64 Dec 7 00:15 8391 -> socket:[52560389]
lrwx------ 1 root root 64 Dec 7 00:15 8392 -> socket:[52402565]
...
但在CCD_ 2中没有类似的输出。
这些插座是什么?我怎样才能知道它们的作用?
monitor_client.go
package main
import (
"crypto/tls"
"encoding/json"
"fmt"
"log"
"net"
"net/http"
nurl "net/url"
"strconv"
"strings"
"syscall"
"time"
)
type Result struct {
Error string `json:"error"`
HttpStatus int `json:"http_status"`
Stime time.Duration `json:"http_time"`
}
//http://stackoverflow.com/questions/20990332/golang-http-timeout-and-goroutines-accumulation
//http://3.3.3.3/http?host=3.2.4.2&servername=a.test&path=/&port=33&timeout=5&scheme=http
func MonitorHttp(w http.ResponseWriter, r *http.Request) {
var host, servername, path, port, scheme string
var timeout int
u, err := nurl.Parse(r.RequestURI)
if err != nil {
log.Fatal(err)
return
}
if host = u.Query().Get("host"); host == "" {
host = "127.0.0.0"
}
if servername = u.Query().Get("servername"); servername == "" {
servername = "localhost"
}
if path = u.Query().Get("path"); path == "" {
path = "/"
}
if port = u.Query().Get("port"); port == "" {
port = "80"
}
if scheme = u.Query().Get("scheme"); scheme == "" {
scheme = "http"
}
if timeout, _ = strconv.Atoi(u.Query().Get("timeout")); timeout == 0 {
timeout = 5
}
//log.Printf("(host)=%s (servername)=%s (path)=%s (port)=%s (timeout)=%d", host, servername, path, port, timeout)
w.Header().Set("Content-Type", "application/json")
res := httptool(host, port, servername, scheme, path, timeout)
result, _ := json.Marshal(res)
fmt.Fprintf(w, "%s", result)
}
func httptool(ip, port, servername, scheme, path string, timeout int) Result {
var result Result
startTime := time.Now()
host := ip + ":" + port
transport := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DisableKeepAlives: true,
}
dialer := net.Dialer{
Timeout: time.Duration(timeout) * time.Second,
KeepAlive: 0 * time.Second,
}
transport.Dial = func(network, address string) (net.Conn, error) {
return dialer.Dial(network, address)
}
client := &http.Client{
Transport: transport,
}
rawquery := ""
url := fmt.Sprintf("%s://%s%s%s", scheme, host, path, rawquery)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
result.HttpStatus = -1
errs := strings.Split(err.Error(), ": ")
result.Error = errs[len(errs)-1]
result.Stime = time.Now().Sub(startTime) / time.Millisecond
return result
}
req.Header.Set("User-Agent", "monitor worker")
req.Header.Set("Connection", "close")
req.Host = servername
resp, err := client.Do(req)
//https://github.com/Basiclytics/neverdown/blob/master/check.go
if err != nil {
nerr, ok := err.(*nurl.Error)
if ok {
switch cerr := nerr.Err.(type) {
case *net.OpError:
switch cerr.Err.(type) {
case *net.DNSError:
errs := strings.Split(cerr.Error(), ": ")
result.Error = "dns: " + errs[len(errs)-1]
default:
errs := strings.Split(cerr.Error(), ": ")
result.Error = "server: " + errs[len(errs)-1]
}
default:
switch nerr.Err.Error() {
case "net/http: request canceled while waiting for connection":
errs := strings.Split(cerr.Error(), ": ")
result.Error = "timeout: " + errs[len(errs)-1]
default:
errs := strings.Split(cerr.Error(), ": ")
result.Error = "unknown: " + errs[len(errs)-1]
}
}
} else {
result.Error = "unknown: " + err.Error()
}
result.HttpStatus = -2
result.Stime = time.Now().Sub(startTime) / time.Millisecond
return result
}
resp.Body.Close()
result.HttpStatus = resp.StatusCode
result.Error = "noerror"
result.Stime = time.Now().Sub(startTime) / time.Millisecond //spend time (ms)
return result
}
func setRlimit() {
var rLimit syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit)
if err != nil {
log.Printf("Unable to obtain rLimit", err)
}
if rLimit.Cur < rLimit.Max {
rLimit.Max = 9999
rLimit.Cur = 9999
err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rLimit)
if err != nil {
log.Printf("Unable to increase number of open files limit", err)
}
}
}
func main() {
setRlimit()
s := &http.Server{
Addr: ":59059",
ReadTimeout: 7 * time.Second,
WriteTimeout: 7 * time.Second,
}
http.HandleFunc("/http", MonitorHttp)
log.Fatal(s.ListenAndServe())
}
这里有几个要点。
我无法重现您的行为,无论如何,can't identify protocol
通常与未正确关闭的套接字有关。
一些评论者建议您不必在每个处理程序中创建http客户端,这是真的。只需创建一次并重复使用即可。
其次,我不确定为什么要创建自己的http.Client
结构,以及为什么要禁用keepalives。你就不能选择http.Get
吗?更简单的代码更容易调试。
第三,不确定为什么要重写transport.Dial
函数。即使你必须这样做,文档(Go 1.9.2)上写着:
% go doc http.transport.dial
type Transport struct {
// Dial specifies the dial function for creating unencrypted TCP
connections.
//
// Deprecated: Use DialContext instead, which allows the transport
// to cancel dials as soon as they are no longer needed.
// If both are set, DialContext takes priority.
Dial func(network, addr string) (net.Conn, error)
关于弃用和缺少拨号盘重用的评论可能会指出问题的根源。
总之,当我站在你的立场上时,我会做两件事:
- 将客户端创建移到执行一次的代码中,或者只使用默认客户端
http.Get
- 我会通过覆盖默认传输字段来清理这件事,如果必须这样做,那么我会按照建议使用
DialContext
祝你好运。
我无法重现该问题。但这是我的2美分(并非双关语)
- 在一篇文章中注意到的SockJS节点中发现了Simmilar问题https://idea.popcount.org/2012-12-09-lsof-cant-identify-protocol/根据这个问题,在FreeBSD上观察到了。但问题是"网络套接字没有得到妥善清理"
- 另一个测试,我希望你们做,若你们仍然有手在相同的环境。如果可能,张贴wireshark日志。只是为了确认网络帧中没有细微的事情可能导致了这种情况
很抱歉,我不能仅仅为了重现这个问题而安装Go 1.5.1。希望这有帮助。