用gzip格式写数据的正确方法是什么?



我的应用程序产生了大量的文本数据,为了减少磁盘消耗,我想用gzip格式写入数据

多个例程同时调用WriteData()函数

但是linux gzip抱怨文件损坏

zcat ./2021-08-11-00.gz > /dev/null
gzip: ./2021-08-11-00.gz: invalid compressed data--format violated

它不是每次都发生,但大约每第二次写入文件就会发生一次。

我的代码有什么问题?

我的DataWrite包看起来像

package storage
import (
"compress/gzip"
"os"
"sync"
"github.com/rs/zerolog/log"
)
type Storage struct {
handle *os.File
writer *gzip.Writer
lock sync.Mutex
}
func (s *Storage) Init(filename string) error {
file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return err
}
s.handle = file
s.writer = gzip.NewWriter(file)
return nil
}
func (s *Storage) Shutdown() {
if err := s.writer.Close(); err != nil {
log.Warn().Err(err).Msg("Gzip writer close failed")
}
if err := s.handle.Close(); err != nil {
log.Warn().Err(err).Msg("Gzip handle close failed")
}
}
func (s *Storage) WriteData(data *MyStruct) error {
s.lock.Lock()
defer s.lock.Unlock()
buffer := data.content
_, err := s.writer.Write([]byte(buffer))
if err != nil {
log.Warn().Err(err).Msg("Gzip write failed")
return err
}
if err := s.writer.Flush(); err != nil {
return err
}
if err := s.handle.Sync(); err != nil {
return err
}
return nil
}

您没有同步关机和写入。

package storage
type Storage struct {
handle *os.File
writer *gzip.Writer
lock sync.Mutex
}
func (s *Storage) Init(filename string) {
file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return err
}
s.handle = file
s.writer = gzip.NewWriter(file)
}
func (s *Storage) Shutdown() {
s.lock.Lock() // Here !!
defer s.lock.Unlock()
if err := s.writer.Close(); err != nil {
log.Warn().Err(err).Str("fileName", path).Msg("Gzip writer close failed")
}
if err := s.handle.Close(); err != nil {
log.Warn().Err(err).Str("fileName", path).Msg("Gzip handle close failed")
}
}
func (s *Storage) WriteData(data *MyStruct) error {
s.lock.Lock()
defer s.lock.Unlock()
cnt, err := s.writer.Write([]byte(buffer))
if err != nil {
log.Warn().Err(err).Msg("Gzip write failed")
return err
}
if err := s.writer.Flush(); err != nil {
return err
}
if err := s.handle.Sync(); err != nil {
return err
}
return nil
}

这里可以看到gzip压缩的工作代码:

package main
import (
"compress/gzip"
"log"
"os"
"time"
"sync"
)

type Storage struct {
handle *os.File
writer *gzip.Writer
buffer []byte
lock sync.Mutex
Name string
Comment string
ModTime time.Time
}
func (s *Storage) Init(filename string) {
file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatal(err)
}
s.handle = file
s.writer = gzip.NewWriter(file)
s.Name = "a-new-hope.txt"
s.Comment = "an epic space opera by George Lucas"
s.ModTime = time.Date(1977, time.May, 25, 0, 0, 0, 0, time.UTC)
s.buffer = []byte("Hello")
}
func (s *Storage) Shutdown() {
if err := s.writer.Close(); err != nil {
log.Fatal("Gzip writer close failed")
}
if err := s.handle.Close(); err != nil {
log.Fatal("Gzip writer close failed")
}
}
func (s *Storage) WriteData() error {
s.lock.Lock()
defer s.lock.Unlock()
_, err := s.writer.Write([]byte(s.buffer))
if err != nil {
log.Fatal("Gzip write failed")
return err
}
if err := s.writer.Flush(); err != nil {
return err
}
if err := s.handle.Sync(); err != nil {
return err
}
return nil
}
func main() {
//WriteGzip("test.gzip", "My data")
s := Storage{};
s.Init("sss.gzip");
s.WriteData();
s.Shutdown();
}

编辑经过修改,使其与所讨论的代码相似,并进行了少量更改。WriteData从存储结构中获取缓冲区作为MyStruct,在代码中没有。

相关内容

最新更新