当我下载大文件时,kafka会重试很多次



我是卡夫卡的新手,我尝试构建一个带有附加文件的服务发送邮件。执行流程:

  • 卡夫卡将收到一条发送邮件的消息
  • 函数get file将从url下载文件,缩放图像并保存文件
  • 发送邮件时,我将从文件夹中获取文件并附加到表单中问题:
  • 当我多次发送带有大文件的邮件时,kafka重试多次,我会收到很多邮件

kafka错误:";kafka服务器:所提供的成员在当前一代中是未知的";

我听了MaxProcessingTime,但我试着用大文件测试邮件,它仍然可以正常工作

Kafka信息:1个经纪人,3个消费者

func (s *customerMailService) SendPODMail() error { filePaths, err := DownloadFiles(podURLs, orderInfo.OrderCode)
if err != nil{
countRetry := 0
for countRetry <= NUM_OF_RETRY{
filePaths, err = DownloadFiles(podURLs, orderInfo.OrderCode)
if err == nil{
break
}
countRetry++
}
}
err = s.sendMailService.Send(ctx, orderInfo.CustomerEmail, tmsPod, content,filePaths)}

功能下载文件:

func DownloadFiles(files []string, orderCode string) ([]string, error) {
var filePaths []string
err := os.Mkdir(tempDir, 0750)
if err != nil && !os.IsExist(err) {
return nil, err
}
tempDirPath := tempDir + "/" + orderCode
err = os.Mkdir(tempDirPath, 0750)
if err != nil && !os.IsExist(err) {
return nil, err
}
for _, fileUrl := range files {
fileUrlParsed, err := url.ParseRequestURI(fileUrl)
if err != nil {
logrus.WithError(err).Infof("Pod url is invalid %s", orderCode)
return nil, err
}
extFile := filepath.Ext(fileUrlParsed.Path)
dir, err := os.MkdirTemp(tempDirPath, "tempDir")
if err != nil {
return nil, err
}
f, err := os.CreateTemp(dir, "tmpfile-*"+extFile)
if err != nil {
return nil, err
}
defer f.Close()
response, err := http.Get(fileUrl)
if err != nil {
return nil, err
}
defer response.Body.Close()
contentTypes := response.Header["Content-Type"]
isTypeAllow := false
for _, contentType := range contentTypes {
if contentType == "image/png" || contentType == "image/jpeg" {
isTypeAllow = true
}
}
if !isTypeAllow {
logrus.WithError(err).Infof("Pod image type is invalid %s", orderCode)
return nil, errors.New("Pod image type is invalid")
}
decodedImg, err := imaging.Decode(response.Body)
if err != nil {
return nil, err
}
resizedImg := imaging.Resize(decodedImg, 1024, 0, imaging.Lanczos)
imaging.Save(resizedImg, f.Name())
filePaths = append(filePaths, f.Name())
}
return filePaths, nil}

功能发送邮件

func (s *tikiMailService) SendFile(ctx context.Context, receiver string, templateCode string, data interface{}, filePaths []string) error {
path := "/v1/emails"
fullPath := fmt.Sprintf("%s%s", s.host, path)
formValue := &bytes.Buffer{}
writer := multipart.NewWriter(formValue)
_ = writer.WriteField("template", templateCode)
_ = writer.WriteField("to", receiver)
if data != nil {
b, err := json.Marshal(data)
if err != nil {
return errors.Wrapf(err, "Cannot marshal mail data to json with object %+v", data)
}
_ = writer.WriteField("params", string(b))
}
for _, filePath := range filePaths {
part, err := writer.CreateFormFile(filePath, filepath.Base(filePath))
if err != nil {
return err
}

pipeReader, pipeWriter := io.Pipe()
go func() {
defer pipeWriter.Close()
file, err := os.Open(filePath)
if err != nil {
return 
}
defer file.Close()
io.Copy(pipeWriter, file)
}()
io.Copy(part, pipeReader)
}
err := writer.Close()
if err != nil {
return err
}
request, err := http.NewRequest("POST", fullPath, formValue)
if err != nil {
return err
}
request.Header.Set("Content-Type", writer.FormDataContentType())
resp, err := s.doer.Do(request)
if err != nil {
return errors.Wrap(err, "Cannot send request to send email")
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return errors.New(fmt.Sprintf("Send email with code %s error: status code %d, response %s",
templateCode, resp.StatusCode, string(b)))
} else {
logrus.Infof("Send email with attachment ,code %s success with response %s , box-code", templateCode, string(b),filePaths)
}
return nil
}

感谢

我的团队在重新部署k8s pod时发现了我的问题,这导致冲突领导者分区导致重新平衡。它将再次尝试处理pod缓冲区中的剩余消息。

解决方案:我不提取保存在缓冲区中的许多消息,我只获取一条消息并通过config:进行处理

ChannelBufferSize = 0

冲突领导者划分示例:

consumer A and B startup in the same time
consumer A registers itself as leader, and owns the topic with all partitions
consumer B registers itself as leader, and then begins to rebalance and owns all partitions
consumer A rebalance and obtains all partitions, but can not consume because the memberId is old and need a new one
consumer B rebalance again and owns the topic with all partitions, but it's already obtained by consumer A

我的两分钱:如果是非常大的附件,消费者需要花费相当多的时间来读取文件并将其作为附件发送。

这增加了两个poll()调用之间的时间量。如果该时间大于max.poll.interval.ms,则认为使用者失败,并且没有提交分区偏移量。因此,消息会被再次处理,最终,如果执行时间偶然低于轮询间隔,则会提交偏移量。效果是多次发送电子邮件。

尝试增加消费者端的max.poll.interval.ms

最新更新