从磁盘获取文件的不同读取速度



我正在开发一个Java应用程序,用于从磁盘读取多个文本文件,修改它们并将修改后的文件写回磁盘。当我使用 http://sourceforge.net/projects/filereadtest/files/filereadtest-java-gui/中的"FileReadTestJavaGui-1.05.jar"运行读取测试时,我的读取速度超过 250 MB/秒。当我运行下面给出的测试应用程序时,我的读取速度低至 11.75 MB/秒。我正在使用ThreadPoolExecutor,无法获得一致的读/写速度。非常欢迎解决此问题的任何建议。我在一台具有 12 GB RAM 的戴尔双四核 2.4 GHz 机器上的 Windows 2008 Server R2 机器上运行了这些测试。

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
public class RunTest {
    private static String baseFileName = "C:/logs/input_";
    private static class FileReaderRunnable implements Runnable {
        public FileReaderRunnable(int threadID) {
            super();
            this.threadID = threadID;
        }
        int threadID = -1;
        @Override
        public void run() {
            Thread.currentThread().setName("Thread-" + threadID);
            String writeBaseFileName = "c:/temp/output";
            String[] args = {baseFileName + threadID + ".log", writeBaseFileName + threadID + ".txt"};
            readAndWriteFileInputOutputStream(args);
        }
    }
    public static void main(String[] args) {
        int MAX_THREADS = 20;
        int coreThreadPoolSize = 2;
        int maxThreadPoolSize = 4;
        long threadKeepAliveTime = 12000000;
        int arrayBlockQueueSize = 400;
        ThreadPoolExecutor executor = new ThreadPoolExecutor(
                coreThreadPoolSize,
                maxThreadPoolSize,
                threadKeepAliveTime,
                TimeUnit.MILLISECONDS,
                new ArrayBlockingQueue<Runnable>(arrayBlockQueueSize, true));
        for (int i = 0; i < MAX_THREADS; i++) {
            FileReaderRunnable frr = new FileReaderRunnable(i+1);
            executor.execute(frr);
        }
        executor.shutdown();
        while (!executor.isTerminated()) {
            try {
                Thread.sleep(5000);
                print("********** executor.getPoolSize()=" + executor.getPoolSize());
                print("********** executor.getCorePoolSize()=" + executor.getCorePoolSize());
                print("********** executor.getActiveCount()=" + executor.getActiveCount());
                print("********** executor.getCompletedTaskCount()=" + executor.getCompletedTaskCount());
                print("********** executor.getTaskCount()=" + executor.getTaskCount());
                print("********** executor.isShutdown()=" + executor.isShutdown());
                print("********** executor.isTerminated()=" + executor.isTerminated());
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
    public static void readAndWriteFileInputOutputStream(String[] args) {
        try {
            String routineName = Thread.currentThread().getName();
            String fInp = "c:\temp\input.txt";
            String fOut = "c:\temp\output.txt";
            if( args.length > 0 ) {
                fInp = args[0];
            }
            if( args.length > 1 ) {
                fOut = args[1];
            }
            FileInputStream fis = null;
            FileOutputStream fos = null;
            // delete the file if it exists
            File fff = new File( fOut );
            if( fff.exists() ) {
                fff.delete();
            }
            fff = null;
            fos = new FileOutputStream(fOut);
            fis = new FileInputStream( fInp );
            long readTime = 0;
            long writeTime = 0;
            final int FILE_CHUNK_SIZE = 16 * 1024;
            byte[] buffer = new byte[ FILE_CHUNK_SIZE ];
            long sTOverall = System.nanoTime();
            final long fileLen = fis.available();
            for ( long restSize = fileLen; restSize > 0; ) {
                final int sizeToProcess = (int) java.lang.Math.min( restSize, FILE_CHUNK_SIZE );
                long sT = System.nanoTime();
                final int actuallyRead = fis.read( buffer, 0, sizeToProcess );
                readTime += System.nanoTime() - sT;
                restSize -= actuallyRead;
                long sT0 = System.nanoTime();
                fos.write(buffer);
                writeTime += System.nanoTime() - sT0;
            }
            sTOverall = System.nanoTime() - sTOverall;
            fff = new File(fOut);
            double size = ((double)fff.length()) / (1024 * 1024);
            double secTaken = ((double)writeTime) / 1000000000;
            print(routineName + ": Took: " + secTaken + " sec to write " + size + " MB which is " + (size/secTaken) + " MB/sec");
            fos.close();
            File fff2 = new File(fInp);
            size = ((double)fff2.length()) / (1024 * 1024);
            secTaken = ((double)readTime) / 1000000000;
            print(routineName + ": Took: " + secTaken + " sec to read " + size + " MB which is " + (size/secTaken) + " MB/sec");
            fis.close();
            print(routineName + ": Took " + sTOverall + " nanosec overall");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public static void print(String string) {
        System.out.println(string);
    }
}

我得到的结果如下:

Thread-1: Took: 0.171561487 sec to write 100.015625 MB which is 582.9724768006936 MB/sec
Thread-2: Took: 0.174120658 sec to write 100.015625 MB which is 574.4041295777781 MB/sec
Thread-1: Took: 0.095193221 sec to read 100.00039100646973 MB which is 1050.4990791988196 MB/sec
Thread-1: Took 273077435 nanosec overall
Thread-2: Took: 0.095017056 sec to read 100.00039100646973 MB which is 1052.4467418404304 MB/sec
Thread-2: Took 273701058 nanosec overall
Thread-4: Took: 0.14674968 sec to write 100.015625 MB which is 681.538964854983 MB/sec
Thread-4: Took: 0.089255036 sec to read 100.00039100646973 MB which is 1120.3893414649424 MB/sec
Thread-4: Took 238318510 nanosec overall
Thread-3: Took: 0.151534826 sec to write 100.015625 MB which is 660.0174206818965 MB/sec
Thread-3: Took: 0.093351422 sec to read 100.00039100646973 MB which is 1071.2251496979845 MB/sec
Thread-3: Took 246756663 nanosec overall
Thread-5: Took: 0.149539881 sec to write 100.015625 MB which is 668.8224193517981 MB/sec
Thread-5: Took: 0.089968802 sec to read 100.00039100646973 MB which is 1111.5007511878364 MB/sec
Thread-5: Took 239927158 nanosec overall
Thread-6: Took: 0.153380444 sec to write 100.015625 MB which is 652.0754692821205 MB/sec
Thread-6: Took: 0.087988256 sec to read 100.00039100646973 MB which is 1136.519753346057 MB/sec
Thread-6: Took 241764966 nanosec overall
Thread-7: Took: 0.142689584 sec to write 100.015625 MB which is 700.9315059745355 MB/sec
Thread-7: Took: 0.088074798 sec to read 100.00039100646973 MB which is 1135.4030128626548 MB/sec
Thread-7: Took 231188741 nanosec overall
Thread-8: Took: 0.140693664 sec to write 100.015625 MB which is 710.8751180152648 MB/sec
Thread-8: Took: 0.085812695 sec to read 100.00039100646973 MB which is 1165.333299536505 MB/sec
Thread-8: Took 226884069 nanosec overall
Thread-9: Took: 0.105057511 sec to write 100.015625 MB which is 952.0083242786895 MB/sec
Thread-9: Took: 0.076369586 sec to read 100.00039100646973 MB which is 1309.4269099019305 MB/sec
Thread-9: Took 181834692 nanosec overall
Thread-10: Took: 0.113160569 sec to write 100.015625 MB which is 883.8381238609713 MB/sec
Thread-10: Took: 0.083640607 sec to read 100.00039100646973 MB which is 1195.5961893780818 MB/sec
Thread-10: Took 197200174 nanosec overall
********** executor.getPoolSize()=2
********** executor.getCorePoolSize()=2
********** executor.getActiveCount()=2
********** executor.getCompletedTaskCount()=10
********** executor.getTaskCount()=20
********** executor.isShutdown()=true
********** executor.isTerminated()=false
Thread-11: Took: 0.154032991 sec to write 100.015625 MB which is 649.3130098343672 MB/sec
Thread-11: Took: 8.420174029 sec to read 100.00039100646973 MB which is 11.876285533061127 MB/sec
Thread-11: Took 8574829737 nanosec overall
********** executor.getPoolSize()=2
********** executor.getCorePoolSize()=2
********** executor.getActiveCount()=2
********** executor.getCompletedTaskCount()=11
********** executor.getTaskCount()=20
********** executor.isShutdown()=true
********** executor.isTerminated()=false
Thread-12: Took: 0.140394781 sec to write 100.015625 MB which is 712.3884825889646 MB/sec
Thread-12: Took: 8.505642503 sec to read 100.00039100646973 MB which is 11.756947340685771 MB/sec
Thread-12: Took 8646511924 nanosec overall
Thread-13: Took: 0.143740254 sec to write 100.015625 MB which is 695.8080441405091 MB/sec
Thread-13: Took: 4.546878135 sec to read 100.00039100646973 MB which is 21.993197978346462 MB/sec
Thread-13: Took 4691105248 nanosec overall
********** executor.getPoolSize()=2
********** executor.getCorePoolSize()=2
********** executor.getActiveCount()=2
********** executor.getCompletedTaskCount()=13
********** executor.getTaskCount()=20
********** executor.isShutdown()=true
********** executor.isTerminated()=false
Thread-14: Took: 0.143977148 sec to write 100.015625 MB which is 694.6631905779936 MB/sec
Thread-14: Took: 5.020245761 sec to read 100.00039100646973 MB which is 19.91942143217911 MB/sec
Thread-14: Took 5164713374 nanosec overall
Thread-15: Took: 0.155099121 sec to write 100.015625 MB which is 644.8497216177002 MB/sec
Thread-15: Took: 3.628340768 sec to read 100.00039100646973 MB which is 27.560914864562612 MB/sec
Thread-15: Took 3783981095 nanosec overall
Thread-16: Took: 0.147386899 sec to write 100.015625 MB which is 678.5923693258518 MB/sec
Thread-16: Took: 3.865473212 sec to read 100.00039100646973 MB which is 25.870154964734425 MB/sec
Thread-16: Took 4013349692 nanosec overall
********** executor.getPoolSize()=2
********** executor.getCorePoolSize()=2
********** executor.getActiveCount()=2
********** executor.getCompletedTaskCount()=16
********** executor.getTaskCount()=20
********** executor.isShutdown()=true
********** executor.isTerminated()=false
Thread-17: Took: 0.145950813 sec to write 100.015625 MB which is 685.2693927782368 MB/sec
Thread-17: Took: 3.594029782 sec to read 100.00039100646973 MB which is 27.824029591324553 MB/sec
Thread-17: Took 3740485565 nanosec overall
Thread-18: Took: 0.143193164 sec to write 100.015625 MB which is 698.4664784696006 MB/sec
Thread-18: Took: 4.387502482 sec to read 100.00039100646973 MB which is 22.792099016861528 MB/sec
Thread-18: Took 4531185662 nanosec overall
********** executor.getPoolSize()=2
********** executor.getCorePoolSize()=2
********** executor.getActiveCount()=2
********** executor.getCompletedTaskCount()=18
********** executor.getTaskCount()=20
********** executor.isShutdown()=true
********** executor.isTerminated()=false
Thread-19: Took: 0.14751012 sec to write 100.015625 MB which is 678.0255144528389 MB/sec
Thread-19: Took: 5.882018447 sec to read 100.00039100646973 MB which is 17.001033218022773 MB/sec
Thread-19: Took 6030072471 nanosec overall
Thread-20: Took: 0.13555387 sec to write 100.015625 MB which is 737.8293589109629 MB/sec
Thread-20: Took: 4.122019495 sec to read 100.00039100646973 MB which is 24.260048048722226 MB/sec
Thread-20: Took 4258033501 nanosec overall
********** executor.getPoolSize()=0
********** executor.getCorePoolSize()=2
********** executor.getActiveCount()=0
********** executor.getCompletedTaskCount()=20
********** executor.getTaskCount()=20
********** executor.isShutdown()=true
********** executor.isTerminated()=true

您的写入时间对于所有运行都是一致的。 您的读取时间开始非常快(> 1 GB/s),但最终会触底。 我怀疑正在发生的事情是您超出了系统的磁盘缓存容量。 对于前 10 个线程,您写入的数据会卡在系统的磁盘缓存中,因此读取速度非常快。 在第 10 个线程之后,您已经写入了足够的数据,系统开始丢弃缓存的数据,实际上必须转到磁盘才能读取数据。 它应该保留 MRU 块,但也许不是。 您没有指定什么操作系统,因此甚至无法推测正在发生的事情。

这是一个警示故事,即微基准充满了危险。 在现代操作系统上有很多优化正在进行,例如 JIT 编译、磁盘缓存(在操作系统、磁盘控制器和磁盘本身中)等,您在基准测试时必须考虑这些因素,尤其是 Java 代码执行速度和 I/O 等基本操作。

我怀疑FileReadTestJavaGui没有写入足够的数据来遇到同样的问题。

最重要的是,如果不深入了解代码和"裸机"之间发生的一切,你就无法获得一致的结果,甚至无法确切地知道你正在测量什么

我怀疑您没有使用直接I/O来避免在RAM中缓存数据(在Java中可以吗?如果您使用的是Linux,则可能需要运行vmstat以查看正在发生的事情,或者如果Windows,则运行Perfmon。我目前正在Raspberry Pi上编写基准测试和压力程序。以下是磁盘(SD 卡)测试的 vmstat 结果,首先是直接 I/O,其中文件缓存保持较小且 CPU 利用率较低。第二个是缓存 I/O,其中缓存可以包含所有使用的文件,并且从 RAM 读取数据,CPU 利用率为 100%。RPi 只有 512 MB 相对较慢的 RAM。对 4 x 164 MB 文件的进一步测试不会缓存它们以供读取,但当操作系统意识到正在重新读取文件时,重复运行可能会有一些缓存。无论文件大小如何,缓存速度都会有很大差异。

vmstat 15 25 > vmstats44.txt
15 second periods, 25 samples
Write 4 files of 82 MB in 46 seconds =   7.1 MB/sec bo
Read 3 x 4 x 82 MB in 62 second       = 15.8 MB/sec bi
Reread 49200 x 64 KB in 167 seconds   = 18.4 MB/sec bi
Rereads 64 KB block for 1 second from 
164 blocks in one file
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 1  0      0 349476   2772  33484    0    0    51   439 1264  229 15 30 53  2
 0  1      0 347428   2796  33532    0    0     2  6996 3793  914 17 42  0 41
 1  1      0 347172   2904  33592    0    0     6  6851 3736  849 11 41  0 48
 1  1      0 347172   2932  33616    0    0     1  7536 3996  931 11 46  0 43
 2  0      0 347048   2948  33616    0    0 13965  1053 6187 1264 14 18  0 68
 0  1      0 347048   2964  33616    0    0 16188     3 6524 1314 13 14  0 73
 1  0      0 347048   2972  33616    0    0 16427     3 6605 1333 12 14  0 74
 0  1      0 347048   2980  33616    0    0 16439     2 6607 1335 13 14  0 73
 2  0      0 347048   2996  33616    0    0 18253     3 7198 1471  7  9  0 84
 1  1      0 347048   3020  33616    0    0 18842     5 7393 1510  4 10  0 87
 0  1      0 346924   3044  33616    0    0 18833     6 7389 1512  2 10  0 88
 0  1      0 346924   3068  33616    0    0 18842     5 7393 1504  4 10  0 87
 1  0      0 346924   3092  33616    0    0 18859     6 7398 1509  3  8  0 89
 0  1      0 346924   3116  33616    0    0 18854     5 7390 1509  3  9  0 88
 0  1      0 346800   3140  33616    0    0 18859     5 7395 1508  4  9  0 87
 1  1      0 346800   3164  33616    0    0 18820     6 7385 1501  3 10  0 87
 1  1      0 346800   3188  33616    0    0 18842     5 7391 1506  3  8  0 89
 1  1      0 346800   3212  33616    0    0 18829     5 7386 1507  3  9  0 88
 0  1      0 346676   3236  33616    0    0 18816     6 7384 1495  4  9  0 88
 0  0      0 346800   3252  33552    0    0  7428     7 3632  640  4  4 59 32

Cached test  - see cache size details
Write 4 x 82 MB time 2.3, 9.1, 12.3, 2.4 seconds average 12.6 MB/sec bo
Read 19 x 4 x 82 MB in 63 seconds                       =  99 MB/sec cpu us sy
Reread 1158120 x 64 KB in 164 seconds                   = 441 MB/sec cpu us sy
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 2  0      0 355484   3692  29308    0    0    60   345 1260  204 14 26 58  2
 0  3      0 124856   3976 254388    0    0    27 13562 1661 1889 16 34  0 50
 1  1      0  11476   4000 365356    0    0     0  8850 1413 2556 10 23  0 67
 1  0      0  11476   4012 365420    0    0     0     2 1750 1118 48 52  0  0
 1  0      0  11344   4020 365420    0    0     0     2 1270  122 50 50  0  0
 1  0      0  11352   4028 365420    0    0     0     4 1271   61 46 54  0  0
 1  1      0  11600   3616 365572    0    0    57     2 1292   77 44 56  0  0
 1  0      0  12320   1940 366340    0    0   254     6 1376  111 14 86  0  0
 1  0      0  11476   1964 367540    0    0    80     4 1304   91 14 86  0  0
 1  0      0  11352   1988 367540    0    0     0     5 1277   82 12 88  0  0
 1  0      0  11352   2012 367540    0    0     0     5 1275   79 13 87  0  0
 1  0      0  11228   2036 367540    0    0     0     5 1276   85 14 86  0  0
 2  0      0  11228   2060 367540    0    0     0     4 1276   80 13 87  0  0
 2  0      0  11228   2084 367540    0    0     0     5 1276   77 13 87  0  0
 1  0      0  11228   2108 367540    0    0     0     4 1276   83 13 87  0  0
 1  0      0  11104   2132 367540    0    0     0     4 1276   78 12 88  0  0
 1  0      0  10980   2300 367552    0    0     9     9 1282   80 11 89  0  0
 0  0      0 352104   2324  31696    0    0     1     5 1273   90 12 81  7  0

最新更新