我正在用go编写一个蜘蛛,到目前为止有以下代码:
import (
"fmt"
"math/rand"
"net/http"
"runtime"
"time"
"golang.org/x/net/html"
)
func handleEnd() {
//handle end (close channels, etc)
fmt.Println("Placeholder")
}
func worker(c chan string) {
var url string
url = <-c
fmt.Println(url)
//get the url
resp, err := http.Get(url)
defer resp.Body.Close()
if err != nil {
fmt.Println(err)
//we should log this incase this is in the background
}
//operate on it
//find links
z := html.NewTokenizer(resp.Body)
for {
tt := z.Next()
switch {
case tt == html.ErrorToken:
// End of the document, we're done
return
case tt == html.StartTagToken:
t := z.Token()
isAnchor := t.Data == "a"
if isAnchor {
fmt.Println("We found a link!")
}
}
}
//handle local links()
//send links to channel
//}
}
func genStartUrls(howMany, length int) []string {
var ans []string
var letters = []rune("abcdefghijklmnopqrstuvwxyz")
rand.Seed(time.Now().UnixNano())
for i := 0; i < howMany; i++ {
b := make([]byte, length)
for i := range b {
b[i] = byte(letters[rand.Intn(len(letters))])
}
ans = append(ans, "http://"+string(b)+".com")
}
return ans
}
func main() {
defer handleEnd()
workers := 3
l := make(chan string, 10000)
defer close(l)
for _, url := range genStartUrls(100, 3) {
l <- url
}
for i := 0; i < workers; i++ {
go worker(l)
}
for {
runtime.Gosched()
}
}
起初,它按预期工作(它从页面中找到 hrefs(,但是当我破坏它并再次运行它,然后再做几次同样的事情时,我收到以下错误:
[signal SIGSEGV: segmentation violation code=0x1 addr=0x40 pc=0x6454f1]
goroutine 7 [running]:
main.worker(0xc0000662a0)
/home/name/go/src/spider/main.go:25 +0x121
created by main.main
/home/name/go/src/spider/main.go:85 +0x138
exit status 2
当我重新启动计算机时,问题在运行几次程序后消失,但随后又回来了。这让我觉得goroutines可能不会在我的主程序结束时结束,而是作为僵尸进程继续。如何修复此错误?
程序结束后,Goroutines 无法继续。
以下几行可能是问题的根源:
resp, err := http.Get(url)
defer resp.Body.Close()
if err != nil {
您必须在错误检查后移动defer
,因为如果出现错误,resp
将为 nil,defer resp.Body.Close()
将失败。
resp, err := http.Get(url)
if err != nil {
...
return err
}
defer resp.Body.Close()