使用 Java 进行多线程处理:性能测试



我在Linux(Ubuntu 18.04(下有一个16个内核的Ryzen处理器,我想用java线程测试它。我对性能有点失望。我的期望是平行计算要快得多,但只有一秒+加几毫秒的差异。

那么,当我假设我的线程不受多个内核支持时?我的小测试不使用任何同步。它只是计算每个矩阵元素。是否有任何 jvm 优化,或者我需要一些参数来运行 java 程序?

这是我的消息来源:

public class Image2DInput <T> {
public static Integer[] dimension(Integer d0, Integer d1) {
return new Integer[] {d0, d1};
}
public static Integer[] point(Integer i, Integer j) {
return new Integer[] {i, j};
}
public static <T> T origin(T origin) {
return origin;
}
private final T origin;
private final BiFunction<Integer[], T, T> delta;
private final Object[][] matrix;
public Image2DInput(
Integer[] dimension, 
T origin, 
BiFunction<Integer[], T, T> delta)
{
throwIfNull(dimension, "origin must not be null");
throwIfNull(origin, "origin must not be null");
throwIfNull(delta, "deltaFunction must not be null");
this.origin = origin;
this.delta = delta;
this.matrix = new Object[dimension[0]][dimension[1]];
if (dimension[0] > 0 && dimension[1] > 0) {
setMatrix();
setOrigin();
}
}
private void setOrigin () {
var d0 = matrix.length / 2;
var d1 = matrix[0].length / 2;
this.matrix[d0][d1] = origin;
}
private void setMatrix () {
var numberOfThreads = Runtime.getRuntime().availableProcessors();
var threads = IntStream
.range(0, numberOfThreads)
.mapToObj( index -> new RowWorker(index, numberOfThreads) )
.collect(Collectors.toList());
threads.stream().forEach( t -> t.start() );
threads.stream().forEach( t -> join(t) );
}
private void setSingleThreadedMatrix () {
for (var i=0; i<matrix.length; ++i)
for (var j=0; j<matrix[0].length; ++j)
matrix[i][j] = delta.apply(new Integer[]{i,j}, origin);
}

private class RowWorker extends Thread {
private Integer id;
private Integer offset;
RowWorker(Integer id, Integer offset) {
this.id = id;
this.offset = offset;
setPriority(Thread.MAX_PRIORITY);
}
@Override
public void run () {
for (var i=id; i<matrix.length; i+=offset)
for (var j=0; j<matrix[0].length; ++j)
matrix[i][j] = delta.apply(new Integer[]{i,j}, origin);
}
}

@Override
public String toString () {
var image = new StringBuilder();
for (var i=0; i<matrix.length; ++i)
writeRow (i, image);
return image.toString();
}
private void writeRow (int i, StringBuilder image) {
for (var j=0; j<matrix[0].length; ++j)
image.append(at(i,j)).append(", ");
image.replace(image.length()-2, image.length(), "n");
}
private T at(int i, int j) {
return (T) matrix[i][j];
}
}

这是我的测试运行:

@Test
public void testPerformance () {
final var dim = dimension(4*2560,4*1440);
complexImage(dim, z(0.0,0.0), (p,o) -> compute(dim, p, o));
}
private Complex compute (Integer[] dim, Integer[] pt, Complex origin) {
var originAt = point(dim[0] /2, dim[1] /2);
var offset = 1.0;
return 
z( (pt[1]-originAt[1])*offset, -(pt[0]-originAt[0])*offset )
.add(origin);
}
private String complexImage (
Integer[] dimension, 
Complex origin, 
BiFunction<Integer[], Complex, Complex> delta)
{
var image = new Image2DInput<Complex>(dimension, origin, delta);
return image.toString();
}

我已经改为FixedThreadPool。我没有看到任何区别。

private void setMatrix () {
var numberOfThreads = Runtime.getRuntime().availableProcessors();
var service = Executors.newFixedThreadPool(numberOfThreads);
IntStream
.range(0, numberOfThreads)
.forEach( id -> service.submit( () -> {
for (var i=id; i<matrix.length; i+=numberOfThreads)
for (var j=0; j<matrix[0].length; ++j)
matrix[i][j] = delta.apply(new Integer[]{i,j}, origin);
} ));
try {
service.shutdown();
service.awaitTermination(60, TimeUnit.SECONDS);
} catch (Exception ex) {}
}

最新更新