如何在浏览器JavaScript中将文件流式传输到计算机和从计算机流式传输文件



我正在开发一个web应用程序(纯HTML/Javascript,没有库(,它可以对文件进行字节级处理(Huffman编码演示(。它工作得很好(你不想知道它花了多长时间(,但我的完成感让我有点困扰,因为我必须将文件加载到ArrayBuffer中,而不是从HDD流式传输。还有一个文件大小限制,尽管压缩4GB文件(我的数据结构支持的最大值(确实需要相当长的时间。

不过,为了让这个应用程序在低资源设备上运行,我如何从文件input框(我需要多次进行频率计数、文件大小检测和实际写入(流式传输文件,并下载到某种浏览器(谢天谢地,这至少是一次(?

以下是目前处理它的相关函数(我为globals:p道歉(:

//Load the file
function startProcessingFile(){ //Loads the file and sets up a callback to start the main process when done.
var ff=document.getElementById("file");//I am assuming that you don't need to see the HTML here. :D
if (ff.files.length === 0) {
displayError("No file selected");
}
else{
displayStatus("Loading File...");
var fr = new FileReader;
fr.onload=function () {inp = new DataView(fr.result); boot();}
fr.onerror=function () {displayError(fr.error)};
fr.readAsArrayBuffer(ff.files[0]);
}
}
//A bit later on -- one of the functions that reads the data from the input file
function countTypes(c){ //counts the frequencies. c is # bytes processed.
if (die){
die=false;
return;
}
var i=Math.ceil(inputSize/100.0);
while (c<inputSize && i>0){
var d=inp.getUint8(c);
frequencies[d]=frequencies[d]+1;
i--;
c++;//Accidental, but funny.
}
var perc=100.0*c/inputSize;
updateProgress(perc);
if (c<inputSize){
setTimeout(function () {countTypes(c);}, 0);
}
else{
updateProgress(100);
system_state++;
taskHandle();
}
}
//Here's where the file is read the last time and also where the bits come from that I want to save. If I could stream the data directly I could probably even get rid of the dry-run stage I currently need to count how many bytes to allocate for the output ArrayBuffer. I mean, Google Drive can download files without telling the browser the size, just whether it's done yet or not, so I'd assume that's a feature I could access here too. I'm just not sure how you actually gain access to a download from JS in the first place.
function encode(c,d){ //performs the Huffman encoding. 
//If d is true, does not actually write. c is # of bits processed so far.
if (die){
die=false;
return;
}
var i=Math.ceil(inputSize/250.0);
while (c<inputSize && i>0){
var b=inp.getUint8(c);
var seq;
for (var j=0; j<table.length; j++){
if (table[j].value===b){
seq=table[j].code
}
}
for (var j=0; j<seq.length; j++){
writeBit(seq[j],d);
}
i--;
c++;//Accidental, but funny.
}
var perc=100.0*c/inputSize;
updateProgress(perc);
if (c<inputSize){
setTimeout(function () {encode(c,d);}, 0);
}
else{
updateProgress(100);
system_state++;
taskHandle();
}
}
//Finally, bit-level access for unaligned read/write so I can actually take advantage of the variable word size of the Huffman encoding (the read is used for decoding).
function readBit(){ //reads one bit (b) from the ArrayBuffer/DataView. The offset of 4 is for the filesize int.
var data_byte=inp.getUint8(byte_index+4);
var res=data_byte>>>bit_index;
bit_index+=1;
if (bit_index>7){
bit_index=0;
byte_index++;
}
return (res&1);
}
function writeBit(b,d){ //writes one bit (b) to the output Arraybuffer/Dataview. If d is true, does not actually write.
if (d===false){ //i.e. not dry-run mode
var bitmask=0xff;
var flag=1<<bit_index;
bitmask=bitmask^flag;
current_byte=current_byte&bitmask;
current_byte=current_byte|(b<<bit_index);
output.setUint8(byte_index+4, current_byte);
}
bit_index+=1;
if (bit_index>7){
bit_index=0;
byte_index++;
}
}
function readByte(){ //reads a byte using readBit. Unaligned.
var b=0;
for (var i=0; i<8; i++){
var t=readBit();
b=b|(t<<i);
}
return b;
}
function writeByte(b,d){ //writes a byte using writeByte. Unaligned.
for (var i=0; i<8; i++){
var res=b>>>i;
writeBit((res&1),d); 
}
}
//And finally the download mechanism I'm using.
function downloadResult(){//download processed file with specified extension
var blobObject = new Blob([output], {type: 'application/octet-stream'});
var n=source_name.split('\').pop().split('/').pop();
if (doEncode){
n=n+fext
}else{
n=n.replace(fext,"");
}
var a = document.createElement("a");
a.setAttribute("href", URL.createObjectURL(blobObject));
a.setAttribute("download", n);
a.click();
delete a;
running=false;
var b=document.getElementById("ac");
if (b.classList.contains("activeNav")){
clearRes();
}
}

我基本上想把其中的大部分内容撕下来,换成可以从用户选择的文件中读取字节或中等大小的数据块的东西,然后当它进入实际输出阶段时,通过或多或少普通的下载将数据逐字节地滴到他们的下载文件夹中。

确实知道在一个文件输入框中可以选择多个文件,所以如果可以下载到子文件夹,我可以想出如何在浏览器中制作文件存档程序。这不是很有趣吗。。。请注意,我很确定这是不可能的(我不明白为什么你不能从网页在浏览器下载文件夹中创建一个子目录,但可能有安全原因(。

如果你需要查看更多代码,请告诉我,但由于这是一个课堂项目,我不想被指控抄袭我自己的应用程序。。。

以流形式从磁盘读取

您可以使用Blob.stream()方法,该方法从Blob(或File(返回ReadableStream。

inp.onchange = async (evt) => {
const stream = inp.files[ 0 ].stream();
const reader = stream.getReader();
while( true ) {
const { done, value } = await reader.read();
if( done ) { break; }
handleChunk( value );
}
console.log( "all done" );
};
function handleChunk( buf ) {
console.log( "received a new buffer", buf.byteLength );
}
<input type="file" id="inp">

对于不支持此方法的旧浏览器,您仍然可以仅使用其.slice()方法按块读取文件:

inp.onchange = async (evt) => {
const file = inp.files[ 0 ];
const chunksize = 64 * 1024;
let offset = 0;
while( offset < file.size ) {
const chunkfile = await file.slice( offset, offset + chunksize );
// Blob.arrayBuffer() can be polyfilled with a FileReader
const chunk = await chunkfile.arrayBuffer();
handleChunk( chunk );
offset += chunksize;
}
console.log( "all done" );
};
function handleChunk( buf ) {
console.log( "received a new buffer", buf.byteLength );
}
<input type="file" id="inp">


然而,将数据流写入磁盘有点困难

Jimmy Wätting有一个伟大的破解,名为StreamSaver.js,它使用Service Workers。我不确定它对浏览器的支持有多大,虽然很棒,但它仍然是一个";破解";并且需要服务工作者来运行。

一种更简单的方法是使用正在定义的文件系统API,该系统目前仅在Chrome中可用。您可以看到这个Q/A的代码示例。

现代浏览器在Javascript 中已经支持了一个流API

Mozilla Streams MDN和示例

// setup your stream with the options, it will help handle the size limitations etc.
var readableStream = new ReadableStream(underlyingSource[, queuingStrategy]);

fetch("https://www.example.org/").then((response) => {
const reader = response.body.getReader();
const stream = new ReadableStream({
start(controller) {
// The following function handles each data chunk
function push() {
// "done" is a Boolean and value a "Uint8Array"
reader.read().then(({ done, value }) => {
// Is there no more data to read?
if (done) {
// Tell the browser that we have finished sending data
controller.close();
return;
}
// Get the data and send it to the browser via the controller
controller.enqueue(value);
push();
});
};

push();
}
});
return new Response(stream, { headers: { "Content-Type": "text/html" } });
});

相关内容

最新更新