opengl - Voxel rendering optimization with geometry shader -


i programming c++ + sdl2 + glew + opengl 4.1 small voxel game little bit minecraft.

i trying optimize voxel rendering can.

i slide world chunks , chunks blocks.

each chunk contains 16x16x16 blocks.

now if edit chunk(remove/place block) rebuild complete chunk , neighbor chunk , upload vao's , vbo's graphic card.

now minimize vertex data have transfer cpu gpu use geometry shaders.

first of all, idea?

i mean every frame geometry shader has calculate primitive each voxel face.

however, programmed vertex shader so, have pass 1 vertex each block face.

to make possible used vec4.

the first 3 elements(x, y, z) used block position , 4. element (w) used indicate in direction face showing.

0 means back, 1 means front, 2 means left, 3 means right, 4 means bottom, 5 means top.

please ignore uv , normal now.

further more upload glbyte's instead of glfloat's.

is idea?

what better/faster way?

    #version 410       uniform mat4 un_combined;       layout(points) in;      layout(triangle_strip, max_vertices = 4) out;       in vec2 ge_uv[];       out vec2 fr_uv;      out vec3 fr_normal;       void main()      {           vec4 o = gl_in[0].gl_position.xyzw;           if(o.w == 0)          {              gl_position = un_combined * vec4(o.x, o.y, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y + 1, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y + 1, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, -1);              emitvertex();          }          else          if(o.w == 1)          {              gl_position = un_combined * vec4(o.x + 1, o.y, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y + 1, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y + 1, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();          }          else          if(o.w == 2)          {              gl_position = un_combined * vec4(o.x, o.y, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y + 1, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y + 1, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(-1, 0, 0);              emitvertex();          }          else          if(o.w == 3)          {              gl_position = un_combined * vec4(o.x + 1, o.y, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y + 1, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y + 1, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(1, 0, 0);              emitvertex();          }          else          if(o.w == 4)          {              gl_position = un_combined * vec4(o.x + 1, o.y, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, -1, 0);              emitvertex();          }          else          {              gl_position = un_combined * vec4(o.x, o.y + 1, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y + 1, o.z + 1, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x, o.y + 1, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 0, 1);              emitvertex();               gl_position = un_combined * vec4(o.x + 1, o.y + 1, o.z, 1);              fr_uv = vec2(0, 0);              fr_normal = vec3(0, 1, 0);              emitvertex();          }           endprimitive();      } 

you can test yourself, in general, geometry shader slow things down rather speed things up. understand, amd gpus have special case geometry shaders output 4 vertexes (true in case) , intel gpus can have fast geometry shaders relative other gpus, in general, without fixed output size optimization, different geometry shaders have synchronized. so, may hitting fast cases here on or many or implementations, have test.

consider vertex data might not large anyway: say, 8 bytes per vertex, or 32 bytes per face. can reuse same index buffer chunks (e.g. 0, 1, 2, 3, 0xffff, 4, 5, 6, 7, 0xffff, …). transforms problem traditional time-vs-space tradeoff. can either spend more time in geometry shader or spend more space storing full vertex data. program running memory limits? or running computation limits? test.

note geometry shader written branchless. instead of using if/else, can use array store basis vectors output of faces in each direction.


Comments

Popular posts from this blog

python - No exponential form of the z-axis in matplotlib-3D-plots -

php - Best Light server (Linux + Web server + Database) for Raspberry Pi -

c# - "Newtonsoft.Json.JsonSerializationException unable to find constructor to use for types" error when deserializing class -