From c94510a487f3c85ce216b28a88602158aa4f3381 Mon Sep 17 00:00:00 2001 From: Mergul Date: Sat, 9 May 2020 19:41:00 +0200 Subject: [PATCH] Improved Demo and multithreading rendering: -added support for multithreaded rendering (fast) -improved shaders -added support for rendering depth -added rendering color support -improved DeptThreadPool (dynamics setting number of tryWait counts before TryWait. Low cpu usage with high responivity) -added possibility to change number of threads --- demos/assets/shaders/base.fp | 2 +- demos/assets/shaders/base.vp | 14 +- demos/external/sources/mmutils/thread_pool.d | 6 +- demos/source/app.d | 23 +- demos/source/demos/simple.d | 8 +- demos/source/demos/snake.d | 30 +- demos/source/demos/space_invaders.d | 24 +- demos/utils/source/ecs_utils/gfx/renderer.d | 309 ++++++++++++++----- 8 files changed, 311 insertions(+), 105 deletions(-) diff --git a/demos/assets/shaders/base.fp b/demos/assets/shaders/base.fp index ca38c30..d86e92e 100644 --- a/demos/assets/shaders/base.fp +++ b/demos/assets/shaders/base.fp @@ -50,6 +50,6 @@ uniform sampler2D tex; void main() { - gl_FragColor = TEX(tex,uv);// * color; + gl_FragColor = TEX(tex,uv) * color; if(gl_FragColor.a < 0.01)discard; } diff --git a/demos/assets/shaders/base.vp b/demos/assets/shaders/base.vp index d2d1af8..7be086b 100644 --- a/demos/assets/shaders/base.vp +++ b/demos/assets/shaders/base.vp @@ -70,7 +70,7 @@ precision lowp samplerCube; #endif #endif*/ - +#define VBO_BATCH 1 M_OUT vec2 uv; L_OUT vec4 color; @@ -91,12 +91,16 @@ LOC(1) ATT vec2 tex_coords; #endif void main() { + #ifdef VBO_BATCH + vec3 position = vec3(positions*4,1.0); + uv = tex_coords; + #else + vec3 position = mat3(matrix_1.x,matrix_1.y,0,matrix_1.z,matrix_1.w,0,matrix_2.xy,1) * vec3(positions,1.0); + uv = tex_coords * uv_transform.zw + uv_transform.xy; + #endif - vec3 position = mat3(matrix_1.x,matrix_1.y,0,matrix_1.z,matrix_1.w,0,matrix_2.xy,1) * vec3(positions,1.0); - position.z = depth; - uv = tex_coords * uv_transform.zw + uv_transform.xy; color = vcolor; - gl_Position = vec4(position.xy,0,1.0); + gl_Position = vec4(position.xy,depth,1.0); } diff --git a/demos/external/sources/mmutils/thread_pool.d b/demos/external/sources/mmutils/thread_pool.d index 20491ef..143960c 100644 --- a/demos/external/sources/mmutils/thread_pool.d +++ b/demos/external/sources/mmutils/thread_pool.d @@ -9,13 +9,14 @@ import bubel.ecs.atomic; //import std.stdio; import std.algorithm : map; -//version = MM_NO_LOGS; // Disable log creation +version = MM_NO_LOGS; // Disable log creation //version = MM_USE_POSIX_THREADS; // Use posix threads insted of standard library, required for betterC version (Posix)version = MM_USE_POSIX_THREADS; version (WebAssembly) { + version = MM_NO_LOGS; extern(C) struct FILE { @@ -799,6 +800,7 @@ struct ThreadPool alias FlushLogsDelegaste = void delegate(ThreadData* threadData, JobLog[] logs); /// Type of delegate to flush logs FlushLogsDelegaste onFlushLogs; /// User custom delegate to flush logs, if overriden defaultFlushLogs will be used. Can be sset after initialize() call int logsCacheNum; /// Number of log cache entries. Should be set before setThreadsNum is called + int tryWaitCount = 2000; ///Number of times which tryWait are called before timedWait call. Higher value sets better response but takes CPU time even if there are no jobs. private: ThreadData*[gMaxThreadsNum] threadsData; /// Data for threads align(64) shared int threadsNum; /// Number of threads currentlu accepting jobs @@ -1455,7 +1457,7 @@ private void threadFunc(ThreadData* threadData) while(!threadData.semaphore.tryWait()) { tryWait++; - if(tryWait>5000) + if(tryWait>threadPool.tryWaitCount) { ok = false; break; diff --git a/demos/source/app.d b/demos/source/app.d index 27c706d..af4b331 100644 --- a/demos/source/app.d +++ b/demos/source/app.d @@ -58,6 +58,7 @@ struct Launcher uint style = 3; uint entities_count; bool multithreading; + int threads; ulong timer_freq; double delta_time; uint fps; @@ -253,13 +254,12 @@ void mainLoop(void* arg) if(launcher.tool && launcher.tool_repeat != 0 && launcher.mouse.left && !igIsWindowHovered(ImGuiHoveredFlags_AnyWindow) && !igIsWindowFocused(ImGuiFocusedFlags_AnyWindow)) { float range = 500.0 / cast(float)launcher.tool_repeat; - launcher.repeat_time += launcher.delta_time*100; + launcher.repeat_time += launcher.delta_time; while(launcher.repeat_time > range) { launcher.repeat_time -= range; launcher.tool((launcher.mouse.position*launcher.scalling)-launcher.render_position, launcher.used_tool, launcher.tool_size); } - } version(WebAssembly) @@ -317,6 +317,14 @@ void mainLoop(void* arg) { launcher.multithreading = !launcher.multithreading; } + igSetNextItemWidth(0); + igLabelText("Threads:",null); + igSameLine(0,4); + if(igSliderInt("##Threads",&launcher.threads, 1, 12, null))//"Multithreading", null, launcher.multithreading, true)) + { + launcher.job_updater.pool.setThreadsNum(launcher.threads); + //launcher.threads = !launcher.multithreading; + } if(igBeginMenu("Show",true)) { if(igMenuItemBool("Statistics",null,launcher.show_stat_wnd,true)) @@ -539,11 +547,14 @@ void mainLoop(void* arg) launcher.renderer.clear(); double loop_time = launcher.getTime(); + launcher.job_updater.pool.tryWaitCount = 5000; if(launcher.loop && !launcher.loop()) { quit(); *cast(bool*)arg = false; } + launcher.job_updater.pool.tryWaitCount = 10; + loop_time = launcher.getTime() - loop_time; double draw_time = launcher.getTime(); @@ -785,7 +796,15 @@ void loadGFX() GfxConfig.materials[0].compile(); GfxConfig.materials[0].bindAttribLocation("positions",0); GfxConfig.materials[0].bindAttribLocation("tex_coords",1); + GfxConfig.materials[0].bindAttribLocation("depth",2); + GfxConfig.materials[0].bindAttribLocation("vcolor",3); GfxConfig.materials[0].link(); + + /* import std.stdio; + writeln("positions ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"positions")); + writeln("tex_coords ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"tex_coords")); + writeln("depth ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"depth")); + writeln("vcolor ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"vcolor"));*/ GfxConfig.materials[0].data.uniforms = Mallocator.makeArray!(Material.Uniform)(3); GfxConfig.materials[0].data.uniforms[0] = Material.Uniform(Material.Type.float4, GfxConfig.materials[0].getLocation("matrix_1"), 0); diff --git a/demos/source/demos/simple.d b/demos/source/demos/simple.d index 8d6b0f2..686f9be 100644 --- a/demos/source/demos/simple.d +++ b/demos/source/demos/simple.d @@ -47,23 +47,25 @@ struct CTexture struct DrawSystem { - mixin ECS.System!1; + mixin ECS.System!32; struct EntitiesData { uint length; + uint thread_id; @readonly CTexture[] textures; @readonly CLocation[] locations; } void onUpdate(EntitiesData data) { - if(launcher.renderer.item_id >= launcher.renderer.MaxObjects)return;//simple leave loop if max visible objects count was reached + if(launcher.renderer.prepared_items >= launcher.renderer.MaxObjects)return;//simple leave loop if max visible objects count was reached foreach(i; 0..data.length) { - launcher.renderer.draw(data.textures[i].tex, data.locations[i].location, vec2(16,16), vec4(0,0,1,1), 0, 0 , 0); + launcher.renderer.draw(data.textures[i].tex, data.locations[i].location, vec2(16,16), vec4(0,0,1,1), cast(ushort)(data.locations[i].y*64+data.locations[i].x), uint.max, 0, 0, 0, data.thread_id); //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1)); } + if(data.thread_id == 0)launcher.renderer.pushData(); } } diff --git a/demos/source/demos/snake.d b/demos/source/demos/snake.d index 2606fd9..94f2654 100644 --- a/demos/source/demos/snake.d +++ b/demos/source/demos/snake.d @@ -607,7 +607,7 @@ struct DrawAppleSystem { foreach(i; 0..data.location.length) { - launcher.renderer.draw(snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,32*px,16*px,16*px), 0, 0 , 0); + launcher.renderer.draw(snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,32*px,16*px,16*px), 0, uint.max, 0); } } } @@ -677,16 +677,16 @@ struct DrawSnakeSystem { final switch(cast(ubyte)part) { - case SnakePart.tail_up:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,112,16,16)*px, 0, 0, 0);break; - case SnakePart.tail_down:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,112,16,16)*px, 0, 0, 0);break; - case SnakePart.tail_left:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,112,16,16)*px, 0, 0, 0);break; - case SnakePart.tail_right:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,144,16,16)*px, 0, 0, 0);break; - case SnakePart.turn_ld:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,128,16,16)*px, 0, 0, 0);break; - case SnakePart.turn_lu:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,144,16,16)*px, 0, 0, 0);break; - case SnakePart.turn_rd:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,144,16,16)*px, 0, 0, 0);break; - case SnakePart.turn_ru:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,112,16,16)*px, 0, 0, 0);break; - case SnakePart.vertical:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,128,16,16)*px, 0, 0, 0);break; - case SnakePart.horizontal:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(48,128,16,16)*px, 0, 0, 0);break; + case SnakePart.tail_up:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,112,16,16)*px, 0, uint.max, 0);break; + case SnakePart.tail_down:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,112,16,16)*px, 0, uint.max, 0);break; + case SnakePart.tail_left:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,112,16,16)*px, 0, uint.max, 0);break; + case SnakePart.tail_right:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,144,16,16)*px, 0, uint.max, 0);break; + case SnakePart.turn_ld:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,128,16,16)*px, 0, uint.max, 0);break; + case SnakePart.turn_lu:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,144,16,16)*px, 0, uint.max, 0);break; + case SnakePart.turn_rd:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,144,16,16)*px, 0, uint.max, 0);break; + case SnakePart.turn_ru:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,112,16,16)*px, 0, uint.max, 0);break; + case SnakePart.vertical:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,128,16,16)*px, 0, uint.max, 0);break; + case SnakePart.horizontal:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(48,128,16,16)*px, 0, uint.max, 0);break; } } @@ -698,10 +698,10 @@ struct DrawSnakeSystem scope vec2 loc = cast(vec2)(data.location[i].location * 16); final switch(snake.direction) { - case CMovement.Direction.up:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,112,16,16)*px, 0, 0 , 0);break; - case CMovement.Direction.down:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,144,16,16)*px, 0, 0 , 0);break; - case CMovement.Direction.left:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,128,16,16)*px, 0, 0 , 0);break; - case CMovement.Direction.right:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(32,128,16,16)*px, 0, 0 , 0);break; + case CMovement.Direction.up:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,112,16,16)*px, 0, uint.max, 0);break; + case CMovement.Direction.down:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,144,16,16)*px, 0, uint.max, 0);break; + case CMovement.Direction.left:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,128,16,16)*px, 0, uint.max, 0);break; + case CMovement.Direction.right:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(32,128,16,16)*px, 0, uint.max, 0);break; } if(snake.parts.length >1) { diff --git a/demos/source/demos/space_invaders.d b/demos/source/demos/space_invaders.d index 6d86113..30891c8 100644 --- a/demos/source/demos/space_invaders.d +++ b/demos/source/demos/space_invaders.d @@ -183,6 +183,8 @@ struct CDepth { mixin ECS.Component; + alias depth this; + short depth; } @@ -208,23 +210,33 @@ struct EChangeDirection struct DrawSystem { - mixin ECS.System!1; + mixin ECS.System!32; struct EntitiesData { uint length; + uint thread_id; @readonly CTexture[] textures; @readonly CLocation[] locations; @readonly CScale[] scale; + @readonly @optional CDepth[] depth; } void onUpdate(EntitiesData data) { - foreach(i; 0..data.length) - { - launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, 0, 0 , 0); - //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1)); - } + if(!data.depth) + foreach(i; 0..data.length) + { + launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, cast(short)data.locations[i].y, uint.max, 0, 0, 0, data.thread_id); + //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1)); + } + else + foreach(i; 0..data.length) + { + launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, cast(short)(data.depth[i] * 64 + data.locations[i].y), uint.max, 0, 0, 0, data.thread_id); + //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1)); + } + if(data.thread_id == 0)launcher.renderer.pushData(); } } diff --git a/demos/utils/source/ecs_utils/gfx/renderer.d b/demos/utils/source/ecs_utils/gfx/renderer.d index 8735d98..ba42cf6 100644 --- a/demos/utils/source/ecs_utils/gfx/renderer.d +++ b/demos/utils/source/ecs_utils/gfx/renderer.d @@ -11,8 +11,8 @@ import ecs_utils.math.vector; import bubel.ecs.block_allocator; import bubel.ecs.vector; - -import glad.gl.gl; +version(WebAssembly)import glad.gl.gles2; +else import glad.gl.gl; version = ver1; /*version(ver5)version = vv2; @@ -57,31 +57,105 @@ struct Renderer vec2 view_pos = vec2(-1,-1); vec2 view_size = vec2(1,1); - const uint batch_size = 16_384; + enum block_size = 2^^16; + enum batch_size = block_size/68;//963;//16_384; //uint[2] time_queries; struct VertexBlock { - float[] batch_vertices; + enum max_items = batch_size;//963; + byte[] batch_vertices; ushort[] batch_indices; void* memory; - uint itmes = 0; + uint items = 0; } + Mutex* get_block_mutex; + Mutex* block_stack_mutex; + VertexBlock getBlock() { VertexBlock block; + get_block_mutex.lock(); block.memory = allocator.getBlock(); - block.batch_vertices = (cast(float*)block.memory)[0 .. 1]; + get_block_mutex.unlock(); + block.batch_vertices = (cast(byte*)block.memory)[0 .. VertexBlock.max_items * 4 * 14]; + block.batch_indices = (cast(ushort*)block.memory)[VertexBlock.max_items * 4 * 7 .. VertexBlock.max_items * (4 * 7 + 6)]; return block; } + Vector!VertexBlock blocks; + uint current_block = 0; + uint render_blocks = 0; + + void pushBlock(VertexBlock block) + { + block_stack_mutex.lock(); + prepared_items += block.items; + blocks.add(block); + render_blocks++; + block_stack_mutex.unlock(); + } + + bool isRemainingBlocks() + { + if(render_blocks <= current_block)return false; + return true; + } + + VertexBlock fetchBlock() + { + block_stack_mutex.lock(); + VertexBlock block = blocks[current_block]; + current_block++; + block_stack_mutex.unlock(); + return block; + } + + void freeBlocks() + { + block_stack_mutex.lock(); + render_blocks = 0; + current_block = 0; + foreach(VertexBlock block; blocks) + { + allocator.freeBlock(block.memory); + } + blocks.clear; + prepared_items=0; + draw_list.clear(); + block_stack_mutex.unlock(); + } + + void pushData() + { + //if(!isRemainingBlocks())return; + while(isRemainingBlocks()) + { + VertexBlock block = fetchBlock(); + uint items = block.items; + if(items + item_id >= MaxObjects)items = MaxObjects - item_id; + batch_vbo[0].bufferSubData(Buffer.BindTarget.array,items*4*14,item_id*4*14,block.batch_vertices.ptr); + batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,items*2*6,item_id*2*6,block.batch_indices.ptr); + draw_list.add(DrawCall(item_id,items)); + item_id += items; + } + } + + void pushThreadsBlocks() + { + foreach(i, ref Thread thread; threads) + { + pushBlock(thread.block); + thread.block = getBlock(); + } + } struct Thread { - - Vector!VertexBlock block; + //Vector!VertexBlock block; RenderData[] render_list; + VertexBlock block; } Thread[] threads; @@ -102,7 +176,7 @@ struct Renderer Buffer[2] batch_vbo; Buffer[2] batch_ibo; - float[] batch_vertices; + ubyte[] batch_vertices; ushort[] batch_indices; Buffer indirect_buffer; @@ -121,8 +195,17 @@ struct Renderer uint mesh_id; } + struct DrawCall + { + uint start; + uint count; + } + + Vector!DrawCall draw_list; + RenderData[] render_list; uint item_id; + uint prepared_items; uint[] multi_count; uint[] multi_offset; @@ -140,6 +223,18 @@ struct Renderer { //this.technique = __ecs_used_technique; __initialize(this); + + get_block_mutex = Mallocator.make!Mutex(); + block_stack_mutex = Mallocator.make!Mutex(); + get_block_mutex.initialize(); + block_stack_mutex.initialize(); + + + threads = Mallocator.makeArray!Thread(12); + foreach(ref Thread thread;threads) + { + thread.block = getBlock(); + } } private static void __initialize_gl(ref Renderer this_) @@ -172,16 +267,16 @@ struct Renderer case Technique.vbo_batch: batch_vbo[0].create(); batch_ibo[0].create(); - batch_vbo[0].bufferData(Buffer.BindTarget.array,16,4*MaxObjects,BufferUsage,null); + batch_vbo[0].bufferData(Buffer.BindTarget.array,14,4*MaxObjects,BufferUsage,null); batch_ibo[0].bufferData(Buffer.BindTarget.element_array,2,6*MaxObjects,BufferUsage,null); batch_vbo[1].create(); batch_ibo[1].create(); - batch_vbo[1].bufferData(Buffer.BindTarget.array,16,4*MaxObjects,BufferUsage,null); + batch_vbo[1].bufferData(Buffer.BindTarget.array,14,4*MaxObjects,BufferUsage,null); batch_ibo[1].bufferData(Buffer.BindTarget.element_array,2,6*MaxObjects,BufferUsage,null); - batch_vertices = Mallocator.makeArray!float(16*MaxObjects); - batch_indices = Mallocator.makeArray!ushort(6*MaxObjects); + //batch_vertices = Mallocator.makeArray!ubyte(14*4*MaxObjects); + //batch_indices = Mallocator.makeArray!ushort(6*MaxObjects); break; case Technique.instanced_attrib_divisor: goto case(Technique.uniform_buffer_indexed); @@ -285,7 +380,7 @@ struct Renderer SDL_Log("Uniform block max size: %u",block_max_size); SDL_Log("Data offset: %u",data_offset); - allocator = BlockAllocator(1245184, 32); + allocator = BlockAllocator(block_size, 32); } } @@ -296,7 +391,7 @@ struct Renderer void draw(Texture tex, vec2 pos, vec2 size, vec4 coords, short depth = 0, uint color = uint.max, float angle = 0, uint material_id = 0, uint mesh_id = 0, uint thread_id = 0) { - if(item_id >= MaxObjects)return; + if(prepared_items >= MaxObjects)return; __draw(this,tex,pos,size,coords,depth,color,angle,material_id,mesh_id,thread_id); } @@ -364,12 +459,14 @@ struct Renderer data_index += data_offset; item_id++; + prepared_items++; } } private static void __draw_gl_vbo_batch(ref Renderer this_, Texture tex, vec2 pos, vec2 size, vec4 coords, short depth, uint color, float angle, uint material_id, uint mesh_id, uint thread_id = 0) { import ecs_utils.gfx.config; + short[3] mem = [depth, *cast(short*)&color, *(cast(short*)&color + 1)]; //import core.stdc.string; with(this_) { @@ -389,16 +486,37 @@ struct Renderer memcpy(ptr+16,pos.data.ptr,8); memcpy(ptr+32,coords.data.ptr,16);*/ + short[] verts = cast(short[])threads[thread_id].block.batch_vertices; + uint item_id = threads[thread_id].block.items; + if(angle == 0) { - batch_vertices[item_id*16] = GfxConfig.meshes[mesh_id].vertices[0] * size.x + pos.x; - batch_vertices[item_id*16+1] = GfxConfig.meshes[mesh_id].vertices[1] * size.y + pos.y; - batch_vertices[item_id*16+4] = GfxConfig.meshes[mesh_id].vertices[4] * size.x + pos.x; - batch_vertices[item_id*16+5] = GfxConfig.meshes[mesh_id].vertices[5] * size.y + pos.y; - batch_vertices[item_id*16+8] = GfxConfig.meshes[mesh_id].vertices[8] * size.x + pos.x; - batch_vertices[item_id*16+9] = GfxConfig.meshes[mesh_id].vertices[9] * size.y + pos.y; - batch_vertices[item_id*16+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x + pos.x; - batch_vertices[item_id*16+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y + pos.y; + verts[item_id*28] = cast(short)((GfxConfig.meshes[mesh_id].vertices[0] * size.x + pos.x) * 8191); + verts[item_id*28+1] = cast(short)((GfxConfig.meshes[mesh_id].vertices[1] * size.y + pos.y) * 8191); + verts[item_id*28+2] = cast(short)((GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x)*32767); + verts[item_id*28+3] = cast(short)((GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y)*32767); + memcpy(verts.ptr+item_id*28+4,mem.ptr,6); + + + verts[item_id*28+7] = cast(short)((GfxConfig.meshes[mesh_id].vertices[4] * size.x + pos.x) * 8191); + verts[item_id*28+8] = cast(short)((GfxConfig.meshes[mesh_id].vertices[5] * size.y + pos.y) * 8191); + verts[item_id*28+9] = cast(short)((GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x)*32767); + verts[item_id*28+10] = cast(short)((GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y)*32767); + memcpy(verts.ptr+item_id*28+11,mem.ptr,6); + + + verts[item_id*28+14] = cast(short)((GfxConfig.meshes[mesh_id].vertices[8] * size.x + pos.x) * 8191); + verts[item_id*28+15] = cast(short)((GfxConfig.meshes[mesh_id].vertices[9] * size.y + pos.y) * 8191); + verts[item_id*28+16] = cast(short)((GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x)*32767); + verts[item_id*28+17] = cast(short)((GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y)*32767); + memcpy(verts.ptr+item_id*28+18,mem.ptr,6); + + + verts[item_id*28+21] = cast(short)((GfxConfig.meshes[mesh_id].vertices[12] * size.x + pos.x) * 8191); + verts[item_id*28+22] = cast(short)((GfxConfig.meshes[mesh_id].vertices[13] * size.y + pos.y) * 8191); + verts[item_id*28+23] = cast(short)((GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x)*32767); + verts[item_id*28+24] = cast(short)((GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y)*32767); + memcpy(verts.ptr+item_id*28+25,mem.ptr,6); } else { @@ -406,50 +524,72 @@ struct Renderer float sinn = sinf(angle); float coss = cosf(angle); - /*batch_vertices[item_id*16] = GfxConfig.meshes[mesh_id].vertices[0] * size.x; - batch_vertices[item_id*16+1] = GfxConfig.meshes[mesh_id].vertices[1] * size.y; - batch_vertices[item_id*16+4] = GfxConfig.meshes[mesh_id].vertices[4] * size.x; - batch_vertices[item_id*16+5] = GfxConfig.meshes[mesh_id].vertices[5] * size.y; - batch_vertices[item_id*16+8] = GfxConfig.meshes[mesh_id].vertices[8] * size.x; - batch_vertices[item_id*16+9] = GfxConfig.meshes[mesh_id].vertices[9] * size.y; - batch_vertices[item_id*16+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x; - batch_vertices[item_id*16+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y;*/ + /*batch_vertices[item_id*28] = GfxConfig.meshes[mesh_id].vertices[0] * size.x; + batch_vertices[item_id*28+1] = GfxConfig.meshes[mesh_id].vertices[1] * size.y; + batch_vertices[item_id*28+4] = GfxConfig.meshes[mesh_id].vertices[4] * size.x; + batch_vertices[item_id*28+5] = GfxConfig.meshes[mesh_id].vertices[5] * size.y; + batch_vertices[item_id*28+8] = GfxConfig.meshes[mesh_id].vertices[8] * size.x; + batch_vertices[item_id*28+9] = GfxConfig.meshes[mesh_id].vertices[9] * size.y; + batch_vertices[item_id*28+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x; + batch_vertices[item_id*28+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y;*/ - batch_vertices[item_id*16] = (GfxConfig.meshes[mesh_id].vertices[0] * coss + GfxConfig.meshes[mesh_id].vertices[1] * sinn) * size.x + pos.x; - batch_vertices[item_id*16+1] = (GfxConfig.meshes[mesh_id].vertices[1] * coss - GfxConfig.meshes[mesh_id].vertices[0] * sinn) * size.y + pos.y; - batch_vertices[item_id*16+4] = (GfxConfig.meshes[mesh_id].vertices[4] * coss + GfxConfig.meshes[mesh_id].vertices[5] * sinn) * size.x + pos.x; - batch_vertices[item_id*16+5] = (GfxConfig.meshes[mesh_id].vertices[5] * coss - GfxConfig.meshes[mesh_id].vertices[4] * sinn) * size.y + pos.y; - batch_vertices[item_id*16+8] = (GfxConfig.meshes[mesh_id].vertices[8] * coss + GfxConfig.meshes[mesh_id].vertices[9] * sinn) * size.x + pos.x; - batch_vertices[item_id*16+9] = (GfxConfig.meshes[mesh_id].vertices[9] * coss - GfxConfig.meshes[mesh_id].vertices[8] * sinn) * size.y + pos.y; - batch_vertices[item_id*16+12] = (GfxConfig.meshes[mesh_id].vertices[12] * coss + GfxConfig.meshes[mesh_id].vertices[13] * sinn) * size.x + pos.x; - batch_vertices[item_id*16+13] = (GfxConfig.meshes[mesh_id].vertices[13] * coss - GfxConfig.meshes[mesh_id].vertices[12] * sinn) * size.y + pos.y; + verts[item_id*28] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[0] * coss + GfxConfig.meshes[mesh_id].vertices[1] * sinn) * size.x + pos.x) * 8191); + verts[item_id*28+1] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[1] * coss - GfxConfig.meshes[mesh_id].vertices[0] * sinn) * size.y + pos.y) * 8191); + verts[item_id*28+7] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[4] * coss + GfxConfig.meshes[mesh_id].vertices[5] * sinn) * size.x + pos.x) * 8191); + verts[item_id*28+8] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[5] * coss - GfxConfig.meshes[mesh_id].vertices[4] * sinn) * size.y + pos.y) * 8191); + verts[item_id*28+14] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[8] * coss + GfxConfig.meshes[mesh_id].vertices[9] * sinn) * size.x + pos.x) * 8191); + verts[item_id*28+15] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[9] * coss - GfxConfig.meshes[mesh_id].vertices[8] * sinn) * size.y + pos.y) * 8191); + verts[item_id*28+21] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[12] * coss + GfxConfig.meshes[mesh_id].vertices[13] * sinn) * size.x + pos.x) * 8191); + verts[item_id*28+22] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[13] * coss - GfxConfig.meshes[mesh_id].vertices[12] * sinn) * size.y + pos.y) * 8191); } - batch_vertices[item_id*16+2] = GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x; - batch_vertices[item_id*16+3] = GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y; - batch_vertices[item_id*16+6] = GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x; - batch_vertices[item_id*16+7] = GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y; - batch_vertices[item_id*16+10] = GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x; - batch_vertices[item_id*16+11] = GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y; - batch_vertices[item_id*16+14] = GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x; - batch_vertices[item_id*16+15] = GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y; + /*verts[item_id*28+2] = cast(short)((GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x)*32767); + verts[item_id*28+3] = cast(short)((GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y)*32767); + verts[item_id*28+9] = cast(short)((GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x)*32767); + verts[item_id*28+10] = cast(short)((GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y)*32767); + verts[item_id*28+16] = cast(short)((GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x)*32767); + verts[item_id*28+17] = cast(short)((GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y)*32767); + verts[item_id*28+23] = cast(short)((GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x)*32767); + verts[item_id*28+24] = cast(short)((GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y)*32767);*/ - uint ind_id = item_id % batch_size; + /*verts[item_id*28+4] = depth; + verts[item_id*28+11] = depth; + verts[item_id*28+18] = depth; + verts[item_id*28+25] = depth; - batch_indices[item_id*6] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[0] + ind_id*4); - batch_indices[item_id*6+1] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[1] + ind_id*4); - batch_indices[item_id*6+2] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[2] + ind_id*4); - batch_indices[item_id*6+3] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[3] + ind_id*4); - batch_indices[item_id*6+4] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[4] + ind_id*4); - batch_indices[item_id*6+5] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[5] + ind_id*4); + *cast(uint*)&verts[item_id*28+5] = color; + *cast(uint*)&verts[item_id*28+12] = color; + *cast(uint*)&verts[item_id*28+19] = color; + *cast(uint*)&verts[item_id*28+26] = color; + + memcpy(verts.ptr+item_id*28+4,mem.ptr,6); + memcpy(verts.ptr+item_id*28+11,mem.ptr,6); + memcpy(verts.ptr+item_id*28+18,mem.ptr,6); + memcpy(verts.ptr+item_id*28+25,mem.ptr,6);*/ + + uint ind_id = (item_id % batch_size)*4; + + ushort[] indices = threads[thread_id].block.batch_indices; + + indices[item_id*6] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[0] + ind_id); + indices[item_id*6+1] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[1] + ind_id); + indices[item_id*6+2] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[2] + ind_id); + indices[item_id*6+3] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[3] + ind_id); + indices[item_id*6+4] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[4] + ind_id); + indices[item_id*6+5] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[5] + ind_id); //render_list[item_id] = RenderData(tex,material_id,mesh_id); - render_list[item_id].texture = tex; - render_list[item_id].material_id = material_id; - render_list[item_id].mesh_id = mesh_id; + //render_list[item_id].texture = tex; + //render_list[item_id].material_id = material_id; + //render_list[item_id].mesh_id = mesh_id; //data_index += 1;//data_offset; - item_id++; + threads[thread_id].block.items++; + if(threads[thread_id].block.items >= VertexBlock.max_items) + { + pushBlock(threads[thread_id].block); + threads[thread_id].block = getBlock(); + } } } @@ -467,9 +607,22 @@ struct Renderer { glClearColor(0,0,0,0); glViewport(0,0,this_.resolution.x,this_.resolution.y); - glClear(GL_COLOR_BUFFER_BIT);// | GL_DEPTH_BUFFER_BIT); - glDisable(GL_DEPTH_TEST); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + //glDisable(GL_DEPTH_TEST); + glEnable(GL_DEPTH_TEST); glDisable(GL_CULL_FACE); + glDepthFunc(GL_LESS); + + version(WebAssembly) + { + glDepthRangef(0,1); + } + else + { + glDepthRange(0,1); + } + //glDepthRange(0,1); + //glClearDepth(1); } void present() @@ -484,6 +637,10 @@ struct Renderer private static void __present_gl(ref Renderer this_) { + + this_.pushThreadsBlocks(); + this_.pushData(); + glViewport(0,0,this_.resolution.x,this_.resolution.y); //glEnable(GL_ALPHA_TEST); //glAlphaFunc(GL_GREATER, 0.01); @@ -505,14 +662,17 @@ struct Renderer break; case Technique.vbo_batch: //if(data_index){ - batch_vbo[0].bufferSubData(Buffer.BindTarget.array,item_id*4*16,0,batch_vertices.ptr); - batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,item_id*6*2,0,batch_indices.ptr); + //batch_vbo[0].bufferSubData(Buffer.BindTarget.array,item_id*4*14,0,batch_vertices.ptr); + //batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,item_id*6*2,0,batch_indices.ptr); batch_vbo[0].bind(Buffer.BindTarget.array); batch_ibo[0].bind(Buffer.BindTarget.element_array); - glVertexAttribPointer(0,2,GL_FLOAT,false,16,null); - glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)8);//} + //glVertexAttribPointer(0,2,GL_SHORT,true,14,null); + //glVertexAttribPointer(1,2,GL_SHORT,true,14,cast(void*)4);//} + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + //glVertexAttribPointer(2,1,GL_SHORT,true,14,cast(void*)6);//} break; case Technique.instanced_attrib_divisor: ubos[0].bufferSubData(Buffer.BindTarget.uniform,data_index,0,uniform_block.ptr); @@ -609,8 +769,8 @@ struct Renderer //glBeginQuery(GL_TIME_ELAPSED, time_queries[0]); if(technique == Technique.vbo_batch) { - uint items = item_id/batch_size+1; - foreach(i; 0..items) + //uint items = item_id/batch_size+1; + foreach(i; 0..draw_list.length) { if(material_id != render_list[i].material_id) { @@ -625,16 +785,20 @@ struct Renderer render_list[i].texture.bind(); } - uint instance_count = batch_size; + /*uint instance_count = batch_size; if((i+1)*batch_size > item_id) { instance_count = item_id%batch_size; - } + }*/ - glVertexAttribPointer(0,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16)); - glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16+8)); + // glVertexAttribPointer(0,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16)); + // glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16+8)); + glVertexAttribPointer(0,2,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14)); + glVertexAttribPointer(1,2,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14+4)); + glVertexAttribPointer(2,1,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14+8)); + glVertexAttribPointer(3,4,GL_UNSIGNED_BYTE,true,14,cast(void*)(draw_list[i].start*4*14+10)); - glDrawElements(GL_TRIANGLES,instance_count*6,GL_UNSIGNED_SHORT,cast(void*)(i*batch_size*6*2)); + glDrawElements(GL_TRIANGLES,draw_list[i].count*6,GL_UNSIGNED_SHORT,cast(void*)(draw_list[i].start*6*2)); //glDrawElementsBaseVertex(GL_TRIANGLES,instance_count*6,GL_UNSIGNED_SHORT,cast(void*)(i*16_384*6*2),i*16_384*4); } @@ -817,6 +981,9 @@ struct Renderer } glDisableVertexAttribArray(0); glDisableVertexAttribArray(1); + glDisableVertexAttribArray(2); + glDisableVertexAttribArray(3); + this_.freeBlocks(); /*glUseProgram(0); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);*/