为什么我从S3 Glacier的输出文件返回空白,即使它说文件是20KB?



我尝试创建一个多部分上传我的Cloudwatch-logs直接存储在S3 Glacier-vault。我在下面附上的代码运行时没有抛出任何错误。但是当我使用S3 Glacier的作业检索机制检索输出文件时,我所得到的只是一个输出文件,该文件是空白的(或似乎是空白的),但它确实说,它具有20KB存储空间。我不确定是什么原因导致这个错误,虽然我怀疑它与我的多部分上传代码有关,所以我把它附加在下面。下面的过程是:

  1. 首先从uploadData创建缓冲区。uploadData已导入
  2. 计算零件数量。我把partSize设置为32 MB。
  3. 计算整个数据的treeHash
  4. 开始上传
const multiPartUpload = async (uploadData, logGroupName) => {
let archiveID = null;
const archiveDescription = "Trying out multipart upload";
const partSize = 1024 * 1024 * 32;
const bufferUploadData = Buffer.from(uploadData);
let numParts = Math.ceil(bufferUploadData.length / partSize);
console.log("Nmber of parts ", numParts);
let startTime = new Date();
let params = { vaultName: vaultname, partSize: partSize.toString() };
// hashNode(bufferUploadData).then(console.log());
let tree_hash = crypto.createHash("sha256").update(bufferUploadData).digest("hex");
console.log("TreeHash is ", tree_hash);
// Initiate the multipart upload
console.log("Initiating upload to", vaultname);
// Call Glacier to initiate the upload.
glacier.initiateMultipartUpload(params, function (mpErr, multipart) {
if (mpErr) {
console.log("Error!", mpErr.stack);
return;
}
console.log("Got upload ID", multipart.uploadId);
// Grab each partSize chunk and upload it as a part
for (var i = 0; i < bufferUploadData.length; i += partSize) {
var end = Math.min(i + partSize, bufferUploadData.length),
partParams = {
vaultName: vaultname,
uploadId: multipart.uploadId,
range: "bytes " + i + "-" + (end - 1) + "/*",
body: bufferUploadData.slice(i, end),
};
// Send a single part
console.log("Uploading part", i, "=", partParams.range);
glacier.uploadMultipartPart(partParams, function (multiErr, mData) {
if (multiErr) {
console.log("Error ", multiErr);
return;
}
console.log("Completed part", mData);
// console.log("Completed part", this.request.params.range);
if (--numParts > 0) {
console.log("in num parts");
return; // complete only when all parts uploaded
}
let doneParams = {
vaultName: vaultname,
uploadId: multipart.uploadId,
archiveSize: bufferUploadData.length.toString(),
checksum: tree_hash, // the computed tree hash
};
console.log("Completing upload...");
let myPromise = new Promise(function (myResolve, myReject) {
// "Producing Code" (May take some time)
glacier.completeMultipartUpload(doneParams, function (err, data) {
if (err) {
console.log("An error occurred while uploading the archive");
console.log(err);
myReject();
} else {
var delta = (new Date() - startTime) / 1000;
console.log("Completed upload in", delta, "seconds");
console.log("Archive ID:", data.archiveId);
console.log("Checksum:  ", data.checksum);
archiveID = data.archiveId;
myResolve();
}
});
});
myPromise
.then(function (value) {
/* code if successful */
// console.log("Value ", value);
let arr = logGroupName.split("/");
let functionName = arr[3];
return new Promise((resolve, reject) => {
// (*)
setTimeout(() => resolve(dynamodbPutItem(archiveID, functionName)), 1000);
});
})
.then(function (value) {
console.log("Here in second promise and archiveID is ", archiveID);
})
.catch((error) => {
console.log("Error in promises during completeMultipartUpload ", error);
});
});
}
});
};

请注意,亚马逊冰川有两种实现:

  • 原始亚马逊冰川使用库和存档. 这是缓慢和真正的痛苦使用。最好避免。
  • 冰川储存类在Amazon S3中。它们比原来的Amazon Glacier便宜,可以通过标准的S3 API调用来使用。它运行起来也快多了。

我建议你修改你的程序使用冰川存储类在S3中。它既便宜又容易使用。

最新更新