Go 在 http 之后不会释放内存。获取



我正在使用简单的线程池加载网页,同时从文件动态加载url。但是这个小程序慢慢地分配和我的服务器一样多的内存,直到omm杀手停止它。看起来respp . body . close()不会为正文文本释放内存(内存大小~下载页面*平均页面大小)。我怎样才能强制golang释放分配给body html文本的内存?

package main
import (
    "bufio"
    "fmt"
    "io/ioutil"
    "net/http"
    "os"
    "strings"
    "sync"
)
func worker(linkChan chan string, wg *sync.WaitGroup) {
    defer wg.Done()
    for url := range linkChan {
        // Getting body text
        resp, err := http.Get(url)
        if err != nil {
            fmt.Printf("Fail url: %sn", url)
            continue
        }
        body, err := ioutil.ReadAll(resp.Body)
        resp.Body.Close()
        if err != nil {
            fmt.Printf("Fail url: %sn", url)
            continue
        }
        // Test page body
        has_rem_code := strings.Contains(string(body), "googleadservices.com/pagead/conversion.js")
        fmt.Printf("Done url: %st%tn", url, has_rem_code)
    }
}
func main() {
    // Creating worker pool
    lCh := make(chan string, 30)
    wg := new(sync.WaitGroup)
    for i := 0; i < 30; i++ {
        wg.Add(1)
        go worker(lCh, wg)
    }
    // Opening file with urls
    file, err := os.Open("./tmp/new.csv")
    defer file.Close()
    if err != nil {
        panic(err)
    }
    reader := bufio.NewReader(file)
    // Processing urls
    for href, _, err := reader.ReadLine(); err == nil; href, _, err = reader.ReadLine() {
        lCh <- string(href)
    }
    close(lCh)
    wg.Wait()
}

下面是pprof工具的一些输出:

      flat  flat%   sum%        cum   cum%
   34.63MB 29.39% 29.39%    34.63MB 29.39%  bufio.NewReaderSize
      30MB 25.46% 54.84%       30MB 25.46%  net/http.(*Transport).getIdleConnCh
   23.09MB 19.59% 74.44%    23.09MB 19.59%  bufio.NewWriter
   11.63MB  9.87% 84.30%    11.63MB  9.87%  net/http.(*Transport).putIdleConn
    6.50MB  5.52% 89.82%     6.50MB  5.52%  main.main

看起来像这个问题,但它在2年前修复了

在golang-nuts的这个帖子里找到了答案。http.Transport在请求同一主机的情况下保存连接以供将来重用,在我的情况下(成千上万个不同的主机)会导致内存膨胀。但是禁用keepalive完全解决了这个问题。

工作代码:

func worker(linkChan chan string, wg *sync.WaitGroup) {
    defer wg.Done()
    var transport http.RoundTripper = &http.Transport{
        DisableKeepAlives: true,
    }
    c := &http.Client{Transport: transport}
    for url := range linkChan {
        // Getting body text
        resp, err := c.Get(url)
        if err != nil {
            fmt.Printf("Fail url: %sn", url)
            continue
        }
        body, err := ioutil.ReadAll(resp.Body)
        resp.Body.Close()
        if err != nil {
            fmt.Printf("Fail url: %sn", url)
            continue
        }
        // Test page body
        has_rem_code := strings.Contains(string(body), "googleadservices.com/pagead/conversion.js")
        fmt.Printf("Done url: %st%tn", url, has_rem_code)
    }
}

最新更新