将文件分割成大于2gb的块



我正试图编写一个方法,将文件分成固定大小的块,但我不能超过2147483590 (Integer)的限制。MaxValue - 57)每个块创建缓冲区时,因为Byte构造器只接受一个整数。

我在其他S.O.回答中读过一个建议,谈到创建小块(例如:100 mb),然后附加块以获得真正所需的GB块大小,但我不知道这是否是正确的方式或如何"附加"块。

有人能帮我吗?,下面是我所做的:

Public Sub SplitFile(ByVal InputFile As String,
                     ByVal ChunkSize As Long,
                     Optional ByVal ChunkName As String = Nothing,
                     Optional ByVal ChunkExt As String = Nothing,
                     Optional ByVal Overwrite As Boolean = False)
    ' FileInfo instance of the input file.
    Dim fInfo As New IO.FileInfo(InputFile)
    ' The total amount of chunks to create.
    Dim ChunkCount As Integer = CInt(Math.Floor(fInfo.Length / ChunkSize))
    ' The remaining bytes of the last chunk.
    Dim LastChunkSize As Long = fInfo.Length - (ChunkCount * ChunkSize)
    ' The Buffer to read the chunks.
    Dim ChunkBuffer As Byte() = New Byte(ChunkSize - 1L) {}
    ' The Buffer to read the last chunk.
    Dim LastChunkBuffer As Byte() = New Byte(LastChunkSize - 1L) {}
    ' A zero-filled string to enumerate the chunk files.
    Dim Zeros As String = String.Empty
    ' The given filename for each chunk.
    Dim ChunkFile As String = String.Empty
    ' The chunk file basename.
    ChunkName = If(String.IsNullOrEmpty(ChunkName),
                   IO.Path.Combine(fInfo.DirectoryName, IO.Path.GetFileNameWithoutExtension(fInfo.Name)),
                   IO.Path.Combine(fInfo.DirectoryName, ChunkName))
    ' The chunk file extension.
    ChunkExt = If(String.IsNullOrEmpty(ChunkExt),
                  fInfo.Extension.Substring(1I),
                  ChunkExt)
    ' If ChunkSize is bigger than filesize then...
    If ChunkSize >= fInfo.Length Then
        Throw New OverflowException("'ChunkSize' should be smaller than the Filesize.")
        Exit Sub
        ' ElseIf ChunkSize > 2147483590I Then ' (Integer.MaxValue - 57)
        '     Throw New OverflowException("'ChunkSize' limit exceeded.")
        '    Exit Sub
    End If ' ChunkSize <>...
    ' If not file-overwrite is allowed then...
    If Not Overwrite Then
        For ChunkIndex As Integer = 0I To (ChunkCount)
            Zeros = New String("0", CStr(ChunkCount).Length - CStr(ChunkIndex + 1).Length)
            ' If chunk file already exists then...
            If IO.File.Exists(String.Format("{0}.{1}.{2}", ChunkName, Zeros & CStr(ChunkIndex + 1I), ChunkExt)) Then
                Throw New IO.IOException(String.Format("File already exist: {0}", ChunkFile))
                Exit Sub
            End If ' IO.File.Exists
        Next ChunkIndex
    End If ' Overwrite
    ' Open the file to start reading bytes.
    Using InputStream As New IO.FileStream(fInfo.FullName, IO.FileMode.Open)
        Using BinaryReader As New IO.BinaryReader(InputStream)
            BinaryReader.BaseStream.Seek(0L, IO.SeekOrigin.Begin)
            For ChunkIndex As Integer = 0I To ChunkCount
                Zeros = New String("0", CStr(ChunkCount).Length - CStr(ChunkIndex + 1).Length)
                ChunkFile = String.Format("{0}.{1}.{2}", ChunkName, Zeros & CStr(ChunkIndex + 1I), ChunkExt)
                If ChunkIndex <> ChunkCount Then ' Read the ChunkSize bytes.
                    InputStream.Position = (ChunkSize * CLng(ChunkIndex))
                    BinaryReader.Read(ChunkBuffer, 0I, ChunkSize)
                Else ' Read the remaining bytes of the LastChunkSize.
                    InputStream.Position = (ChunkSize * ChunkIndex) + 1
                    BinaryReader.Read(LastChunkBuffer, 0I, LastChunkSize)
                End If ' ChunkIndex <> ChunkCount
                ' Create the chunk file to Write the bytes.
                Using OutputStream As New IO.FileStream(ChunkFile, IO.FileMode.Create)
                    Using BinaryWriter As New IO.BinaryWriter(OutputStream)
                        If ChunkIndex <> ChunkCount Then
                            BinaryWriter.Write(ChunkBuffer)
                        Else
                            BinaryWriter.Write(LastChunkBuffer)
                        End If
                        OutputStream.Flush()
                    End Using ' BinaryWriter
                End Using ' OutputStream
                ' Report the progress...
                ' RaiseEvent ProgressChanged(CDbl((100I / ChunkCount) * ChunkIndex))
            Next ChunkIndex
        End Using ' BinaryReader
    End Using ' InputStream
End Sub

重新考虑你的方法。分割文件只需要一个小缓冲区。最多以1MB的块进行读写。不需要更多了。使用您的方法,您可以一次在RAM中缓冲2GB,但不需要缓冲整个块。只需跟踪对每个文件块的读和写的总字节数。

从技术上讲,你可以让它工作在一个单字节缓冲区,虽然这将是低效的。

如果您真的想要调优性能,请尝试使用循环缓冲区或具有独立读写线程的单独缓冲区来重叠IO,这样您就可以并行地读写。一旦读操作填满了一个缓冲区,就可以让写线程开始写,而读线程继续使用另一个缓冲区。这个想法是为了避免串行"锁步骤"使用单个缓冲区。

正如我在评论中所写的那样,您可以将数据写入块,直到它们的大小足够大。读取是用一个较小的缓冲区(我从你的问题中取了一些代码部分)在循环中完成的,同时计算已经写了多少字节。

' Open the file to start reading bytes.
Using InputStream As New IO.FileStream(fInfo.FullName, IO.FileMode.Open)
    Using BinaryReader As New IO.BinaryReader(InputStream)
        Dim OneMegabyte As Integer = 1024 * 1024 'Defines length of one MB
        'Account for cases where a chunksize smaller than one MegaByte is requested
        Dim BufferSize As Integer
        If ChunkSize < OneMegabyte Then
           BufferSize = CInt(ChunkSize)
        Else
           BufferSize = OneMegabyte
        End If
        Dim BytesWritten As Long = 0 'Counts the length of the current file
        Dim ChunkIndex As Integer = 0 'Keep track of the number of chunks
        While InputStream.Position < InputStream.Length
            ChunkFile = String.Format("{0}.{1}.{2}", ChunkName, Zeros & CStr(ChunkIndex + 1I), ChunkExt) 'Define filename
            BytesWritten = 0 'Reset length counter
            ' Create the chunk file to Write the bytes.
            Using OutputStream As New IO.FileStream(ChunkFile, IO.FileMode.Create)
                Using BinaryWriter As New IO.BinaryWriter(OutputStream)
                    While BytesWritten < ChunkSize AndAlso InputStream.Position < InputStream.Length 'Read until you have reached the end of the input
                        Dim ReadBytes() As Byte = BinaryReader.ReadBytes(BufferSize) 'Read one megabyte
                        BinaryWriter.Write(ReadBytes) 'Write this megabyte
                        BytesWritten += ReadBytes.Count 'Increment size counter
                    End While
                    OutputStream.Flush()
                End Using ' BinaryWriter
            End Using ' OutputStream
            ChunkIndex += 1 'Increment file counter
        End While

    End Using ' BinaryReader
End Using ' InputStream

相关内容

  • 没有找到相关文章

最新更新