我正在使用简单的线程池加载网页,同时从文件动态加载url。但是这个小程序慢慢地分配和我的服务器一样多的内存,直到omm杀手停止它。看起来respp . body . close()不会为正文文本释放内存(内存大小~下载页面*平均页面大小)。我怎样才能强制golang释放分配给body html文本的内存?
package main
import (
"bufio"
"fmt"
"io/ioutil"
"net/http"
"os"
"strings"
"sync"
)
func worker(linkChan chan string, wg *sync.WaitGroup) {
defer wg.Done()
for url := range linkChan {
// Getting body text
resp, err := http.Get(url)
if err != nil {
fmt.Printf("Fail url: %sn", url)
continue
}
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
fmt.Printf("Fail url: %sn", url)
continue
}
// Test page body
has_rem_code := strings.Contains(string(body), "googleadservices.com/pagead/conversion.js")
fmt.Printf("Done url: %st%tn", url, has_rem_code)
}
}
func main() {
// Creating worker pool
lCh := make(chan string, 30)
wg := new(sync.WaitGroup)
for i := 0; i < 30; i++ {
wg.Add(1)
go worker(lCh, wg)
}
// Opening file with urls
file, err := os.Open("./tmp/new.csv")
defer file.Close()
if err != nil {
panic(err)
}
reader := bufio.NewReader(file)
// Processing urls
for href, _, err := reader.ReadLine(); err == nil; href, _, err = reader.ReadLine() {
lCh <- string(href)
}
close(lCh)
wg.Wait()
}
下面是pprof工具的一些输出:
flat flat% sum% cum cum%
34.63MB 29.39% 29.39% 34.63MB 29.39% bufio.NewReaderSize
30MB 25.46% 54.84% 30MB 25.46% net/http.(*Transport).getIdleConnCh
23.09MB 19.59% 74.44% 23.09MB 19.59% bufio.NewWriter
11.63MB 9.87% 84.30% 11.63MB 9.87% net/http.(*Transport).putIdleConn
6.50MB 5.52% 89.82% 6.50MB 5.52% main.main
看起来像这个问题,但它在2年前修复了
在golang-nuts的这个帖子里找到了答案。http.Transport
在请求同一主机的情况下保存连接以供将来重用,在我的情况下(成千上万个不同的主机)会导致内存膨胀。但是禁用keepalive完全解决了这个问题。
func worker(linkChan chan string, wg *sync.WaitGroup) {
defer wg.Done()
var transport http.RoundTripper = &http.Transport{
DisableKeepAlives: true,
}
c := &http.Client{Transport: transport}
for url := range linkChan {
// Getting body text
resp, err := c.Get(url)
if err != nil {
fmt.Printf("Fail url: %sn", url)
continue
}
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
fmt.Printf("Fail url: %sn", url)
continue
}
// Test page body
has_rem_code := strings.Contains(string(body), "googleadservices.com/pagead/conversion.js")
fmt.Printf("Done url: %st%tn", url, has_rem_code)
}
}