校验和不匹配数据接收错误在部分使用b2 backblaze本地php API上传大文件



我使用以下文档从PHP调用Backblaze B2 API: https://www.backblaze.com/b2/docs/b2_upload_part.html

代码如下:

<?php
// Used by curl when CURLOPT_READFUNCTION is set
function myReadFile($curl_rsrc, $file_pointer, $length) {
return fread($file_pointer, $length);
}
// Upload parts
$minimum_part_size = 100 * (1000 * 1000); // Obtained from b2_authorize_account. The default is 100 MB
$local_file = "<path to large file>";
$local_file_size = filesize($local_file);
$total_bytes_sent = 0;
$bytes_sent_for_part = 0;
$bytes_sent_for_part = $minimum_part_size;
$sha1_of_parts = Array();
$part_no = 1;
$file_handle = fopen($local_file, "r");
while($total_bytes_sent < $local_file_size) {
// Determine the number of bytes to send based on the minimum part size 
if (($local_file_size - $total_bytes_sent) < $minimum_part_size) {
$bytes_sent_for_part = ($local_file_size - $total_bytes_sent);
}
// Get a sha1 of the part we are going to send  
fseek($file_handle, $total_bytes_sent);
$data_part = fread($file_handle, $bytes_sent_for_part);
array_push($sha1_of_parts, sha1($data_part));
fseek($file_handle, $total_bytes_sent);
// Send it over th wire
$session = curl_init($upload_url);
// Add headers
$headers = array();
$headers[] = "Accept: application/json";
$headers[] = "Authorization: " . $large_file_auth_token;
$headers[] = "Content-Length: " . $bytes_sent_for_part;
$headers[] = "X-Bz-Part-Number: " . $part_no;
$headers[] = "X-Bz-Content-Sha1: " . $sha1_of_parts[$part_no - 1];
curl_setopt($session, CURLOPT_POST, true);
curl_setopt($session, CURLOPT_HTTPHEADER, $headers);  // Add headers
curl_setopt($session, CURLOPT_INFILE, $file_handle);
curl_setopt($session, CURLOPT_INFILESIZE, (int)$bytes_sent_for_part);
curl_setopt($session, CURLOPT_RETURNTRANSFER, true); // Receive server response
curl_setopt($session, CURLOPT_READFUNCTION, "myReadFile");
$server_output = curl_exec($session);
curl_close ($session);
print $server_output . "n";    
// Prepare for the next iteration of the loop
$part_no++;
$total_bytes_sent = $bytes_sent_for_part + $total_bytes_sent;
$read_file_bytes_read = 0;
}
fclose($file_handle);
?>

它给出了我的响应:

{
"code": "bad_request",
"message": "Checksum did not match data received",
"status": 400
}

文档状态:

CURLOPT_INFILESIZE

上传文件到远程站点时期望的文件大小,单位为字节。注意,使用这个选项不会阻止libcurl发送更多的数据,因为确切地发送什么取决于CURLOPT_READFUNCTION。

这似乎意味着curl每次只是读取到文件的末尾。

你要做的是让你提供给CURLOPT_READFUNCTION的函数更花哨,并意识到它应该在哪里停止读取它的块。

为此,我编写了一个CurlFileChunker类,它封装了任务及其所有相关的函数和变量。

class CurlFileChunker {
private $fp;
protected $chunkSize;
protected $offset;
protected $nextStop;

public function __construct($fp, int $chunkSize) {
$this->fp = $fp;
$this->chunkSize = $chunkSize;
$this->offset = ftell($fp);
$this->nextStop = $this->offset + $this->chunkSize;
}

protected function getChunk() {
return fread($this->fp, $this->chunkSize);
}

protected function reset() {
fseek($this->fp, $this->offset);
}

public function eof() {
return feof($this->fp);
}

public function getChunkInfo(string $hashMethod) {
$chunk = $this->getChunk();
$info = [
'hash' => hash($hashMethod, $chunk),
'length' => strlen($chunk)
];
$this->reset();
return $info;
}

public function next() {
$this->nextStop = $this->offset + $this->chunkSize;
}
public function curlReadFunction($ch, $fp, int $length) {
if( $fp !== $this->fp ) {
throw new Exception('File handle supplied differs from expected.');
}
// case 1: requested read is still within the chunk, return the requested data.
if( $this->offset + $length < $this->nextStop ) {
$out = fread($this->fp, $length);
// case 2: requested read goes beyond the bounds of the chunk, return data up to the chunk boundary.
} else if( $this->nextStop - $this->offset > 0 ) {
$out = fread($this->fp, $this->nextStop - $this->offset);
// case 3: offset and nextstop are the same, return zero byte string signifying EOF to curl
} else {
$out = '';
}
$this->offset = ftell($this->fp);
return $out;
}
}

下面是一个示例用法,假设我们正在Curl读取比块小的块:

$fp = fopen('php://memory', 'rwb');
fwrite($fp, 'lorem ipsum dolor sit amet');
rewind($fp);
$c = new CurlFileChunker($fp, 10);
while( ! $c->eof() ) {
$info = $c->getChunkInfo('sha1');
var_dump($info);
$chunk = '';
while( $part = $c->curlReadFunction(NULL, $fp, 7) ) {
$chunk .= $part;
}
var_dump($chunk);
$c->next();
}

输出:

array(2) {
["hash"]=>
string(40) "94ae3406c7e5e2ba31208dc623c20d2a107bfec2"
["length"]=>
int(10)
}
string(10) "lorem ipsu"
string(40) "94ae3406c7e5e2ba31208dc623c20d2a107bfec2"
array(2) {
["hash"]=>
string(40) "aebf816b6e13941737d5045c294ffe785ca55733"
["length"]=>
int(10)
}
string(10) "m dolor si"
string(40) "aebf816b6e13941737d5045c294ffe785ca55733"
array(2) {
["hash"]=>
string(40) "21d8e40707fa773b532ae892f82c057e92764f3a"
["length"]=>
int(6)
}
string(6) "t amet"
string(40) "21d8e40707fa773b532ae892f82c057e92764f3a"

,你的代码大致变成:

$file_handle = fopen($local_file, "r");
$c = new CurlFileChunker($file_handle, 10 * 1024 * 1024);
$part_no = 0;
while( ! $c->eof ) {
$info = $c->getChunkInfo('sha1');

$session = curl_init($upload_url);
$headers = array();
$headers[] = "Accept: application/json";
$headers[] = "Authorization: " . $large_file_auth_token;
$headers[] = "Content-Length: " . $info['length'];
$headers[] = "X-Bz-Part-Number: " . $part_no;
$headers[] = "X-Bz-Content-Sha1: " . $info['hash'];
curl_setopt($session, CURLOPT_POST, true);
curl_setopt($session, CURLOPT_HTTPHEADER, $headers);  // Add headers
curl_setopt($session, CURLOPT_INFILE, $file_handle);
curl_setopt($session, CURLOPT_INFILESIZE, $info['length'];
curl_setopt($session, CURLOPT_RETURNTRANSFER, true); // Receive server response
curl_setopt($session, CURLOPT_READFUNCTION, [$c, 'curlReadFunction']);
$server_output = curl_exec($session);
curl_close ($session);

$c->next();
}

研究我在试图重现此行为时观察到的行为,使我在PHP的curl实现处理POST请求的方式中发现了一个陷阱。

PHP的curl默认为chunked传输编码,B2 Native API不支持。对我来说,这引发了'missing Content-Length header'错误,但它也可能是您看到的错误的原因,具体取决于您的代码。

对我的修复是在头数组中包含一个空的Transfer-Encoding头:

$headers[] = "Transfer-Encoding:";

我将各种所需的B2本机API调用组合到一个完整的工作PHP B2多部分上传示例中,该示例从命令行运行:https://gist.github.com/metadaddy/23e75d00b21bc63576f0ba317ad43709

希望,即使空的Transfer-Encoding头不能为您修复它,示例代码将帮助您继续前进。

最新更新