碎片着色器与多个浮动操作缓慢在Xoom (tegra2)



我试图在我开发的android游戏中使用少量的噪音功能(这里是单纯的噪音)来创造"燃烧的星星"的印象。不幸的是,我不能使用3d纹理,因为它们在gles扩展和android包中没有包含它们。

我唯一的选择是计算碎片着色器中的噪声函数。下面提供的代码在HTC Desire Z和lg optimus one上运行顺畅或可接受(20-60fps)。在摩托罗拉XOOM(具有tegra2芯片组)上使用相同的程序,但是即使只显示一小部分对象,我也会获得fps的分数(1-3)。

迄今为止我们尝试过的事情:

  • 干扰精度(low - p- high),无论是在第一行指令中,还是在每次出现float/vec时分别指定

  • 注释噪声函数的部分-似乎没有任何特别的瓶颈,它将所有东西结合在一起

  • 搜索与积分、着色器浮点数等相关的问题

这是复制此行为所需的代码的一部分。请注意,在XOOM上有一些伪影,我们认为是由16位浮点运算在tegra中引起的。

precision mediump float;
#define pi 3.141592653589793238462643383279     

//
// Description : Array and textureless GLSL 2D/3D/4D simplex
// noise functions.
// Author : Ian McEwan, Ashima Arts.
// Maintainer : ijm
// Lastmod : 20110822 (ijm)
// License : Copyright (C) 2011 Ashima Arts. All rights reserved.
// Distributed under the MIT License. See LICENSE file.
// https://github.com/ashima/webgl-noise
//
vec3 mod289(vec3 x) {
  return x - floor(x * (1.0 / 289.0)) * 289.0;
}
vec4 mod289(vec4 x) {
  return x - floor(x * (1.0 / 289.0)) * 289.0;
}
vec4 permute(vec4 x) {
     return mod289(((x*34.0)+1.0)*x);
}
vec4 taylorInvSqrt(vec4 r)
{
  return 1.79284291400159 - 0.85373472095314 * r;
}
float snoise(vec3 v)
  {
  const vec2 C = vec2(1.0/6.0, 1.0/3.0) ;
  const vec4 D = vec4(0.0, 0.5, 1.0, 2.0);
// First corner
  vec3 i = floor(v + dot(v, C.yyy) );
  vec3 x0 = v - i + dot(i, C.xxx) ;
// Other corners
  vec3 g = step(x0.yzx, x0.xyz);
  vec3 l = 1.0 - g;
  vec3 i1 = min( g.xyz, l.zxy );
  vec3 i2 = max( g.xyz, l.zxy );
  // x0 = x0 - 0.0 + 0.0 * C.xxx;
  // x1 = x0 - i1 + 1.0 * C.xxx;
  // x2 = x0 - i2 + 2.0 * C.xxx;
  // x3 = x0 - 1.0 + 3.0 * C.xxx;
  vec3 x1 = x0 - i1 + C.xxx;
  vec3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
  vec3 x3 = x0 - D.yyy; // -1.0+3.0*C.x = -0.5 = -D.y
// Permutations
  i = mod289(i);
  vec4 p = permute( permute( permute(
             i.z + vec4(0.0, i1.z, i2.z, 1.0 ))
           + i.y + vec4(0.0, i1.y, i2.y, 1.0 ))
           + i.x + vec4(0.0, i1.x, i2.x, 1.0 ));
// Gradients: 7x7 points over a square, mapped onto an octahedron.
// The ring size 17*17 = 289 is close to a multiple of 49 (49*6 = 294)
  float n_ = 0.142857142857; // 1.0/7.0
  vec3 ns = n_ * D.wyz - D.xzx;
  vec4 j = p - 49.0 * floor(p * ns.z * ns.z); // mod(p,7*7)
  vec4 x_ = floor(j * ns.z);
  vec4 y_ = floor(j - 7.0 * x_ ); // mod(j,N)
  vec4 x = x_ *ns.x + ns.yyyy;
  vec4 y = y_ *ns.x + ns.yyyy;
  vec4 h = 1.0 - abs(x) - abs(y);
  vec4 b0 = vec4( x.xy, y.xy );
  vec4 b1 = vec4( x.zw, y.zw );
  //vec4 s0 = vec4(lessThan(b0,0.0))*2.0 - 1.0;
  //vec4 s1 = vec4(lessThan(b1,0.0))*2.0 - 1.0;
  vec4 s0 = floor(b0)*2.0 + 1.0;
  vec4 s1 = floor(b1)*2.0 + 1.0;
  vec4 sh = -step(h, vec4(0.0));
  vec4 a0 = b0.xzyw + s0.xzyw*sh.xxyy ;
  vec4 a1 = b1.xzyw + s1.xzyw*sh.zzww ;
  vec3 p0 = vec3(a0.xy,h.x);
  vec3 p1 = vec3(a0.zw,h.y);
  vec3 p2 = vec3(a1.xy,h.z);
  vec3 p3 = vec3(a1.zw,h.w);
//Normalise gradients
  vec4 norm = taylorInvSqrt(vec4(dot(p0,p0), dot(p1,p1), dot(p2, p2), dot(p3,p3)));
  p0 *= norm.x;
  p1 *= norm.y;
  p2 *= norm.z;
  p3 *= norm.w;
// Mix final noise value
  vec4 m = max(0.6 - vec4(dot(x0,x0), dot(x1,x1), dot(x2,x2), dot(x3,x3)), 0.0);
  m = m * m;
  return 42.0 * dot( m*m, vec4( dot(p0,x0), dot(p1,x1),
                                dot(p2,x2), dot(p3,x3) ) );
  }

uniform vec3 color1;
uniform vec3 color2;                        
uniform float t;
varying vec3 vTextureCoord;
void main()
{
    float t = 0.5; //mod(t, 3.0);
    float x = (vTextureCoord.x)*2.0;    
    float y = -(vTextureCoord.y)*2.0;                
    float r = sqrt(x * x + y * y);          
    gl_FragColor = vec4(0.0,0.0,0.0,0.0);   
    if(r<= 1.0){                                
        float n = snoise( vec3(vec2(x,y), Mr_T*3.3 ) );
        gl_FragColor = vec4( mix(color1,color2, abs(n) ) ,1.0);
    }
}

不久前我在tegra2中遇到了同样的问题。阅读1.3:http://www.opengl.org/wiki/GLSL_:_common_mistakes。感受痛苦。在我的例子中,fps上升了两次,但仍然很糟糕。

最新更新