如何节省/加载张量以加快训练



我想知道是否可以保存和加载张量在tensorflow.js中,以免为每批重新计算它们吗?问题是我的GPU几乎没有使用,因为它必须等待CPU在培训之前将我的数组转换为张量。

我现在看起来像这样:

  1. 加载数据集(从HDD读取到数组)(1-2秒)

2.cpu将数组转换为张量(花费很长时间)

3.GPU火车(需要1秒或更少)

  1. 卸载/整理(5秒,也有点太长)

  2. 重复

编辑:这是一些有问题的代码(意味着长期计算)和无问题的行评论:

async function learn_on(ep){
    for (var learn_ep = ep+1; learn_ep <= 1200; learn_ep++) {
        var batch_start = 0;
        var mini_batch_in = [];
        var mini_batch_out = [];
        var shuffle_arr=[];
        for(var i=0;i<in_tensor_sum.length;i++){
            shuffle_arr.push(i); // needs no time
        }
        shuffle_arr=F_shuffle_array(shuffle_arr); // needs no time
        // in_tensor_sum / out_tensor_sum is just an 2 dimensional array = data_set number , data points 
        for (var batch_num = batch_start; batch_num < in_tensor_sum.length; batch_num++) {
            mini_batch_in.push(in_tensor_sum[shuffle_arr[batch_num]]); // very fast also
            mini_batch_out.push(out_tensor_sum[shuffle_arr[batch_num]]);// very fast also
            if (batch_num + 1 == batch_start + 250 || batch_num == in_tensor_sum.length - 1) {
                //possible to import/export xs/ys?????
                var xs = tf.tensor(mini_batch_in); //here CPU heavy computation!!!!!!!!!!!!!!!! TAKES LONG TIME 9600 input units here
                var ys = tf.tensor(mini_batch_out); // and here CPU heavy computation!!!!!!!! TAKES not so Long time, but this is because of small output size just 400
                // GPU ACCELARATION starts here Super fast only one second! This rocks!!!
                await model.fit(xs, ys, {
                    epochs: 1, shuffle: true,
                    callbacks: {
                        onEpochEnd: async (epoch, log) => {
                            console.log(`${batch_num}:|Epoch ${learn_ep}: | set: ${batch_num / in_tensor_sum.length} | loss = ${log.loss}`);                          
                        },
                        onTrainEnd: async () => {
                        }
                    }
                });
                //avoid memory leaks START (ALSO TAKES a little time!!!!)
                await tf.tidy(() => {
                    tf.tensor([xs, ys]);
                    console.log('numTensors (inside tidy): ' + tf.memory().numTensors);
                });
                console.log('numTensors (outside tidy): ' + tf.memory().numTensors);
                xs.dispose();
                ys.dispose();
                console.log('numTensors (after dispose): ' + tf.memory().numTensors);
                batch_start = batch_num + 1;
                mini_batch_in = [];
                mini_batch_out = [];
                //avoid memory leaks END
            }

        }
    }
}

编辑2:

我现在尝试使用'tfjs-npy'来保存和加载张量。但是我有一个错误:

.
.
.
var xs = await tf.tensor(mini_batch_in);
var ys = await tf.tensor(mini_batch_out);
var fs = require('fs');            
var tf_parser= require  ('tfjs-npy');

var writeTO=await tf_parser.serialize(ys);
await fs.writeFileSync('/home/test/NetBeansProjects/ispeed_tensload/save_tensors/test.js',new Buffer(writeTO));
var tensor_data =await fs.readFileSync("/home/test/NetBeansProjects/ispeed_tensload/save_tensors/test.js");
var my_arrayBuffer = new Uint8Array(tensor_data).buffer;
var ys2=await tf_parser.parse(my_arrayBuffer);

await model.fit(xs, ys2, {....

错误:

(node:26576) UnhandledPromiseRejectionWarning: TypeError: Cannot read property 'values' of undefined
    at NodeJSKernelBackend.getInputTensorIds (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:142:26)
    at NodeJSKernelBackend.executeSingleOutput (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:186:73)
    at NodeJSKernelBackend.gather (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-node/dist/nodejs_kernel_backend.js:965:21)
    at environment_1.ENV.engine.runKernel.$x (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-core/dist/ops/segment_ops.js:56:84)
    at /home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-core/dist/engine.js:129:26
    at Engine.scopedRun (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-core/dist/engine.js:101:23)
    at Engine.runKernel (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-core/dist/engine.js:127:14)
    at gather_ (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-core/dist/ops/segment_ops.js:56:38)
    at Object.gather (/home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-core/dist/ops/operation.js:23:29)
    at /home/test/NetBeansProjects/ispeed_tensload/node_modules/@tensorflow/tfjs-layers/dist/backend/tfjs_backend.js:275:20

我想" tfjs-npy"会产生的格式不匹配。但是我不知道。另一个可接受的解决方案是让Tensor创建过程在GPU正在训练时在多个线程(C 后端优化)上运行,以将空闲时间降低到最小。但是我不知道这是可能的。现在,创建过程仅在node.js进程中运行单程,该过程的性能非常弱。

nodejs使用的内存可以用标志--max-old-space-size增加,如下所示。nodejstensorflow.js都没有问题。唯一的问题可能是您的内存能力。这可能是出发并返回阅读数据的唯一原因。

话虽如此,目前尚不清楚这里正在做什么:

 await tf.tidy(() => {
                    tf.tensor([xs, ys]);
                    console.log('numTensors (inside tidy): ' + tf.memory().numTensors);
                });

这是没有用的,因为:

  • 张张量是创建并丢弃的。

  • xsys不是数组,例如 tf.tensor([xs, ys]),将创建一个2 nan值的张量。它对代码的性能没有任何影响。

张量xsys分别用xs.dispose()ys.dispose()

分别删除

最新更新