From c94510a487f3c85ce216b28a88602158aa4f3381 Mon Sep 17 00:00:00 2001
From: Mergul <wismerchil@interia.eu>
Date: Sat, 9 May 2020 19:41:00 +0200
Subject: [PATCH] Improved Demo and multithreading rendering: -added support
 for multithreaded rendering (fast) -improved shaders -added support for
 rendering depth -added rendering color support -improved DeptThreadPool
 (dynamics setting number of tryWait counts before TryWait. Low cpu usage with
 high responivity) -added possibility to change number of threads

---
 demos/assets/shaders/base.fp                 |   2 +-
 demos/assets/shaders/base.vp                 |  14 +-
 demos/external/sources/mmutils/thread_pool.d |   6 +-
 demos/source/app.d                           |  23 +-
 demos/source/demos/simple.d                  |   8 +-
 demos/source/demos/snake.d                   |  30 +-
 demos/source/demos/space_invaders.d          |  24 +-
 demos/utils/source/ecs_utils/gfx/renderer.d  | 309 ++++++++++++++-----
 8 files changed, 311 insertions(+), 105 deletions(-)

diff --git a/demos/assets/shaders/base.fp b/demos/assets/shaders/base.fp
index ca38c30..d86e92e 100644
--- a/demos/assets/shaders/base.fp
+++ b/demos/assets/shaders/base.fp
@@ -50,6 +50,6 @@ uniform sampler2D tex;
 
 void main() 
 {
-    gl_FragColor  = TEX(tex,uv);// * color;
+    gl_FragColor  = TEX(tex,uv) * color;
 	if(gl_FragColor.a < 0.01)discard;
 }
diff --git a/demos/assets/shaders/base.vp b/demos/assets/shaders/base.vp
index d2d1af8..7be086b 100644
--- a/demos/assets/shaders/base.vp
+++ b/demos/assets/shaders/base.vp
@@ -70,7 +70,7 @@ precision lowp samplerCube;
 	#endif
 #endif*/
 
-
+#define VBO_BATCH 1
 
 M_OUT vec2 uv;
 L_OUT vec4 color;
@@ -91,12 +91,16 @@ LOC(1) ATT vec2 tex_coords;
 #endif
 
 void main() {
+	#ifdef VBO_BATCH
+        vec3 position = vec3(positions*4,1.0);
+        uv = tex_coords;
+    #else
+        vec3 position = mat3(matrix_1.x,matrix_1.y,0,matrix_1.z,matrix_1.w,0,matrix_2.xy,1) * vec3(positions,1.0);
+        uv = tex_coords * uv_transform.zw + uv_transform.xy;
+    #endif
 	
-	vec3 position = mat3(matrix_1.x,matrix_1.y,0,matrix_1.z,matrix_1.w,0,matrix_2.xy,1) * vec3(positions,1.0);
-    position.z = depth;
-	uv = tex_coords * uv_transform.zw + uv_transform.xy;
 	color = vcolor;
     
-	gl_Position = vec4(position.xy,0,1.0);
+	gl_Position = vec4(position.xy,depth,1.0);
 	
 }
diff --git a/demos/external/sources/mmutils/thread_pool.d b/demos/external/sources/mmutils/thread_pool.d
index 20491ef..143960c 100644
--- a/demos/external/sources/mmutils/thread_pool.d
+++ b/demos/external/sources/mmutils/thread_pool.d
@@ -9,13 +9,14 @@ import bubel.ecs.atomic;
 //import std.stdio;
 import std.algorithm : map;
 
-//version = MM_NO_LOGS; // Disable log creation
+version = MM_NO_LOGS; // Disable log creation
 //version = MM_USE_POSIX_THREADS; // Use posix threads insted of standard library, required for betterC
 
 version (Posix)version = MM_USE_POSIX_THREADS;
 
 version (WebAssembly)
 {
+	version = MM_NO_LOGS;
 	extern(C) struct FILE
 	{
 		
@@ -799,6 +800,7 @@ struct ThreadPool
 	alias FlushLogsDelegaste = void delegate(ThreadData* threadData, JobLog[] logs); /// Type of delegate to flush logs
 	FlushLogsDelegaste onFlushLogs; /// User custom delegate to flush logs, if overriden defaultFlushLogs will be used. Can be sset after initialize() call
 	int logsCacheNum; /// Number of log cache entries. Should be set before setThreadsNum is called
+	int tryWaitCount = 2000; ///Number of times which tryWait are called before timedWait call. Higher value sets better response but takes CPU time even if there are no jobs.
 private:
 	ThreadData*[gMaxThreadsNum] threadsData; /// Data for threads
 	align(64) shared int threadsNum; /// Number of threads currentlu accepting jobs
@@ -1455,7 +1457,7 @@ private void threadFunc(ThreadData* threadData)
 				while(!threadData.semaphore.tryWait())
 				{
 					tryWait++;
-					if(tryWait>5000)
+					if(tryWait>threadPool.tryWaitCount)
 					{
 						ok = false;
 						break;
diff --git a/demos/source/app.d b/demos/source/app.d
index 27c706d..af4b331 100644
--- a/demos/source/app.d
+++ b/demos/source/app.d
@@ -58,6 +58,7 @@ struct Launcher
     uint style = 3;
     uint entities_count;
     bool multithreading;
+    int threads;
     ulong timer_freq;
     double delta_time;
     uint fps;
@@ -253,13 +254,12 @@ void mainLoop(void* arg)
     if(launcher.tool && launcher.tool_repeat != 0 && launcher.mouse.left && !igIsWindowHovered(ImGuiHoveredFlags_AnyWindow) && !igIsWindowFocused(ImGuiFocusedFlags_AnyWindow))
     {
         float range = 500.0 / cast(float)launcher.tool_repeat;
-        launcher.repeat_time += launcher.delta_time*100;
+        launcher.repeat_time += launcher.delta_time;
         while(launcher.repeat_time > range)
         {
             launcher.repeat_time -= range;
             launcher.tool((launcher.mouse.position*launcher.scalling)-launcher.render_position, launcher.used_tool, launcher.tool_size);
         }
-        
     }
 
     version(WebAssembly)
@@ -317,6 +317,14 @@ void mainLoop(void* arg)
             {
                 launcher.multithreading = !launcher.multithreading;
             }
+            igSetNextItemWidth(0);
+            igLabelText("Threads:",null);
+            igSameLine(0,4);
+            if(igSliderInt("##Threads",&launcher.threads, 1, 12, null))//"Multithreading", null, launcher.multithreading, true))
+            {
+                launcher.job_updater.pool.setThreadsNum(launcher.threads);
+                //launcher.threads = !launcher.multithreading;
+            }
             if(igBeginMenu("Show",true))
             {
                 if(igMenuItemBool("Statistics",null,launcher.show_stat_wnd,true))
@@ -539,11 +547,14 @@ void mainLoop(void* arg)
     launcher.renderer.clear();
 
     double loop_time = launcher.getTime();
+    launcher.job_updater.pool.tryWaitCount = 5000;
     if(launcher.loop && !launcher.loop())
     {   
         quit();
         *cast(bool*)arg = false;
     }
+    launcher.job_updater.pool.tryWaitCount = 10;
+
     loop_time = launcher.getTime() - loop_time;
 
     double draw_time = launcher.getTime();
@@ -785,7 +796,15 @@ void loadGFX()
     GfxConfig.materials[0].compile();
     GfxConfig.materials[0].bindAttribLocation("positions",0);
     GfxConfig.materials[0].bindAttribLocation("tex_coords",1);
+    GfxConfig.materials[0].bindAttribLocation("depth",2);
+    GfxConfig.materials[0].bindAttribLocation("vcolor",3);
     GfxConfig.materials[0].link();
+
+   /* import std.stdio;
+    writeln("positions ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"positions"));
+    writeln("tex_coords ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"tex_coords"));
+    writeln("depth ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"depth"));
+    writeln("vcolor ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"vcolor"));*/
     
     GfxConfig.materials[0].data.uniforms = Mallocator.makeArray!(Material.Uniform)(3);
     GfxConfig.materials[0].data.uniforms[0] = Material.Uniform(Material.Type.float4, GfxConfig.materials[0].getLocation("matrix_1"), 0);
diff --git a/demos/source/demos/simple.d b/demos/source/demos/simple.d
index 8d6b0f2..686f9be 100644
--- a/demos/source/demos/simple.d
+++ b/demos/source/demos/simple.d
@@ -47,23 +47,25 @@ struct CTexture
 
 struct DrawSystem
 {
-    mixin ECS.System!1;
+    mixin ECS.System!32;
 
     struct EntitiesData
     {
         uint length;
+        uint thread_id;
         @readonly CTexture[] textures;
         @readonly CLocation[] locations;
     }
 
     void onUpdate(EntitiesData data)
     {
-        if(launcher.renderer.item_id >= launcher.renderer.MaxObjects)return;//simple leave loop if max visible objects count was reached
+        if(launcher.renderer.prepared_items >= launcher.renderer.MaxObjects)return;//simple leave loop if max visible objects count was reached
         foreach(i; 0..data.length)
         {
-            launcher.renderer.draw(data.textures[i].tex, data.locations[i].location, vec2(16,16), vec4(0,0,1,1), 0, 0 , 0);
+            launcher.renderer.draw(data.textures[i].tex, data.locations[i].location, vec2(16,16), vec4(0,0,1,1), cast(ushort)(data.locations[i].y*64+data.locations[i].x), uint.max, 0, 0, 0, data.thread_id);
             //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
         }
+        if(data.thread_id == 0)launcher.renderer.pushData();
     }
 }
 
diff --git a/demos/source/demos/snake.d b/demos/source/demos/snake.d
index 2606fd9..94f2654 100644
--- a/demos/source/demos/snake.d
+++ b/demos/source/demos/snake.d
@@ -607,7 +607,7 @@ struct DrawAppleSystem
     {
         foreach(i; 0..data.location.length)
         {
-            launcher.renderer.draw(snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,32*px,16*px,16*px), 0, 0 , 0);
+            launcher.renderer.draw(snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,32*px,16*px,16*px), 0, uint.max, 0);
         }
     }
 }
@@ -677,16 +677,16 @@ struct DrawSnakeSystem
     {
         final switch(cast(ubyte)part)
         {
-            case SnakePart.tail_up:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.tail_down:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.tail_left:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.tail_right:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,144,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_ld:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,128,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_lu:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,144,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_rd:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,144,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_ru:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.vertical:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,128,16,16)*px, 0, 0, 0);break;
-            case SnakePart.horizontal:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(48,128,16,16)*px, 0, 0, 0);break;
+            case SnakePart.tail_up:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.tail_down:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.tail_left:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.tail_right:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,144,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_ld:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,128,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_lu:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,144,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_rd:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,144,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_ru:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.vertical:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,128,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.horizontal:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(48,128,16,16)*px, 0, uint.max, 0);break;
         }
     }
 
@@ -698,10 +698,10 @@ struct DrawSnakeSystem
             scope vec2 loc = cast(vec2)(data.location[i].location * 16);
             final switch(snake.direction)
             {
-                case CMovement.Direction.up:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,112,16,16)*px, 0, 0 , 0);break;
-                case CMovement.Direction.down:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,144,16,16)*px, 0, 0 , 0);break;
-                case CMovement.Direction.left:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,128,16,16)*px, 0, 0 , 0);break;
-                case CMovement.Direction.right:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(32,128,16,16)*px, 0, 0 , 0);break;
+                case CMovement.Direction.up:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,112,16,16)*px, 0, uint.max, 0);break;
+                case CMovement.Direction.down:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,144,16,16)*px, 0, uint.max, 0);break;
+                case CMovement.Direction.left:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,128,16,16)*px, 0, uint.max, 0);break;
+                case CMovement.Direction.right:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(32,128,16,16)*px, 0, uint.max, 0);break;
             }
             if(snake.parts.length >1)
             {
diff --git a/demos/source/demos/space_invaders.d b/demos/source/demos/space_invaders.d
index 6d86113..30891c8 100644
--- a/demos/source/demos/space_invaders.d
+++ b/demos/source/demos/space_invaders.d
@@ -183,6 +183,8 @@ struct CDepth
 {
     mixin ECS.Component;
 
+    alias depth this;
+
     short depth;
 }
 
@@ -208,23 +210,33 @@ struct EChangeDirection
 
 struct DrawSystem
 {
-    mixin ECS.System!1;
+    mixin ECS.System!32;
 
     struct EntitiesData
     {
         uint length;
+        uint thread_id;
         @readonly CTexture[] textures;
         @readonly CLocation[] locations;
         @readonly CScale[] scale;
+        @readonly @optional CDepth[] depth;
     }
 
     void onUpdate(EntitiesData data)
     {
-        foreach(i; 0..data.length)
-        {
-            launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, 0, 0 , 0);
-            //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
-        }
+        if(!data.depth)
+            foreach(i; 0..data.length)
+            {
+                launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, cast(short)data.locations[i].y, uint.max, 0, 0, 0, data.thread_id);
+                //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
+            }
+        else
+            foreach(i; 0..data.length)
+            {
+                launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, cast(short)(data.depth[i] * 64 + data.locations[i].y), uint.max, 0, 0, 0, data.thread_id);
+                //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
+            }
+        if(data.thread_id == 0)launcher.renderer.pushData();
     }
 }
 
diff --git a/demos/utils/source/ecs_utils/gfx/renderer.d b/demos/utils/source/ecs_utils/gfx/renderer.d
index 8735d98..ba42cf6 100644
--- a/demos/utils/source/ecs_utils/gfx/renderer.d
+++ b/demos/utils/source/ecs_utils/gfx/renderer.d
@@ -11,8 +11,8 @@ import ecs_utils.math.vector;
 
 import bubel.ecs.block_allocator;
 import bubel.ecs.vector;
-
-import glad.gl.gl;
+version(WebAssembly)import glad.gl.gles2;
+else import glad.gl.gl;
 
 version = ver1;
 /*version(ver5)version = vv2;
@@ -57,31 +57,105 @@ struct Renderer
     vec2 view_pos = vec2(-1,-1);
     vec2 view_size = vec2(1,1);
 
-    const uint batch_size = 16_384;
+    enum block_size = 2^^16;
+    enum batch_size  = block_size/68;//963;//16_384;
     //uint[2] time_queries;
 
     struct VertexBlock
     {
-        float[] batch_vertices;
+        enum max_items = batch_size;//963;
+        byte[] batch_vertices;
         ushort[] batch_indices;
         void* memory;
-        uint itmes = 0;
+        uint items = 0;
     }
 
+    Mutex* get_block_mutex;
+    Mutex* block_stack_mutex;
+
     VertexBlock getBlock()
     {
         VertexBlock block;
+        get_block_mutex.lock();
         block.memory = allocator.getBlock();
-        block.batch_vertices = (cast(float*)block.memory)[0 .. 1];
+        get_block_mutex.unlock();
+        block.batch_vertices = (cast(byte*)block.memory)[0 .. VertexBlock.max_items * 4 * 14];
+        block.batch_indices = (cast(ushort*)block.memory)[VertexBlock.max_items * 4 * 7 .. VertexBlock.max_items * (4 * 7 + 6)];
         return block;
     }
 
+    Vector!VertexBlock blocks;
+    uint current_block = 0;
+    uint render_blocks = 0;
+
+    void pushBlock(VertexBlock block)
+    {
+        block_stack_mutex.lock();
+        prepared_items += block.items;
+        blocks.add(block);
+        render_blocks++;
+        block_stack_mutex.unlock();
+    }
+
+    bool isRemainingBlocks()
+    {
+        if(render_blocks <= current_block)return false;
+        return true;
+    }
+
+    VertexBlock fetchBlock()
+    {
+        block_stack_mutex.lock();
+        VertexBlock block = blocks[current_block];
+        current_block++;
+        block_stack_mutex.unlock();
+        return block;
+    }
+
+    void freeBlocks()
+    {
+        block_stack_mutex.lock();
+        render_blocks = 0;
+        current_block = 0;
+        foreach(VertexBlock block; blocks)
+        {
+            allocator.freeBlock(block.memory);
+        }
+        blocks.clear;
+        prepared_items=0;
+        draw_list.clear();
+        block_stack_mutex.unlock();
+    }
+
+    void pushData()
+    {
+        //if(!isRemainingBlocks())return;
+        while(isRemainingBlocks())
+        {
+            VertexBlock block = fetchBlock();
+            uint items = block.items;
+            if(items + item_id >= MaxObjects)items = MaxObjects - item_id;
+            batch_vbo[0].bufferSubData(Buffer.BindTarget.array,items*4*14,item_id*4*14,block.batch_vertices.ptr);
+            batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,items*2*6,item_id*2*6,block.batch_indices.ptr);
+            draw_list.add(DrawCall(item_id,items));
+            item_id += items;
+        }
+    }
+
+    void pushThreadsBlocks()
+    {
+        foreach(i, ref Thread thread; threads)
+        {
+            pushBlock(thread.block);
+            thread.block = getBlock();
+        }
+    }
 
     struct Thread
     {
-
-        Vector!VertexBlock block;
+        //Vector!VertexBlock block;
         RenderData[] render_list;
+        VertexBlock block;
     }
     Thread[] threads;
 
@@ -102,7 +176,7 @@ struct Renderer
     Buffer[2] batch_vbo;
     Buffer[2] batch_ibo;
 
-    float[] batch_vertices;
+    ubyte[] batch_vertices;
     ushort[] batch_indices;
 
     Buffer indirect_buffer;
@@ -121,8 +195,17 @@ struct Renderer
         uint mesh_id;
     }
 
+    struct DrawCall
+    {
+        uint start;
+        uint count;
+    }
+
+    Vector!DrawCall draw_list;
+
     RenderData[] render_list;
     uint item_id;
+    uint prepared_items;
 
     uint[] multi_count;
     uint[] multi_offset;
@@ -140,6 +223,18 @@ struct Renderer
     {
         //this.technique = __ecs_used_technique;
         __initialize(this);
+
+        get_block_mutex = Mallocator.make!Mutex();
+        block_stack_mutex = Mallocator.make!Mutex();
+        get_block_mutex.initialize();
+        block_stack_mutex.initialize();
+
+
+        threads = Mallocator.makeArray!Thread(12);
+        foreach(ref Thread thread;threads)
+        {
+            thread.block = getBlock();
+        }
     }
 
     private static void __initialize_gl(ref Renderer this_)
@@ -172,16 +267,16 @@ struct Renderer
                 case Technique.vbo_batch:
                     batch_vbo[0].create();
                     batch_ibo[0].create();
-                    batch_vbo[0].bufferData(Buffer.BindTarget.array,16,4*MaxObjects,BufferUsage,null);
+                    batch_vbo[0].bufferData(Buffer.BindTarget.array,14,4*MaxObjects,BufferUsage,null);
                     batch_ibo[0].bufferData(Buffer.BindTarget.element_array,2,6*MaxObjects,BufferUsage,null);
 
                     batch_vbo[1].create();
                     batch_ibo[1].create();
-                    batch_vbo[1].bufferData(Buffer.BindTarget.array,16,4*MaxObjects,BufferUsage,null);
+                    batch_vbo[1].bufferData(Buffer.BindTarget.array,14,4*MaxObjects,BufferUsage,null);
                     batch_ibo[1].bufferData(Buffer.BindTarget.element_array,2,6*MaxObjects,BufferUsage,null);
 
-                    batch_vertices = Mallocator.makeArray!float(16*MaxObjects);
-                    batch_indices = Mallocator.makeArray!ushort(6*MaxObjects);
+                    //batch_vertices = Mallocator.makeArray!ubyte(14*4*MaxObjects);
+                    //batch_indices = Mallocator.makeArray!ushort(6*MaxObjects);
                     break;
                 case Technique.instanced_attrib_divisor:
                     goto case(Technique.uniform_buffer_indexed);
@@ -285,7 +380,7 @@ struct Renderer
             SDL_Log("Uniform block max size: %u",block_max_size);
             SDL_Log("Data offset: %u",data_offset);
 
-            allocator = BlockAllocator(1245184, 32);
+            allocator = BlockAllocator(block_size, 32);
         }
     }
 
@@ -296,7 +391,7 @@ struct Renderer
 
     void draw(Texture tex, vec2 pos, vec2 size, vec4 coords, short depth = 0, uint color = uint.max, float angle = 0, uint material_id = 0, uint mesh_id = 0, uint thread_id = 0)
     {
-        if(item_id >= MaxObjects)return;
+        if(prepared_items >= MaxObjects)return;
         __draw(this,tex,pos,size,coords,depth,color,angle,material_id,mesh_id,thread_id);
     }
 
@@ -364,12 +459,14 @@ struct Renderer
             
             data_index += data_offset;
             item_id++;
+            prepared_items++;
         }
     }
 
     private static void __draw_gl_vbo_batch(ref Renderer this_, Texture tex, vec2 pos, vec2 size, vec4 coords, short depth, uint color, float angle, uint material_id, uint mesh_id, uint thread_id = 0)
     {
         import ecs_utils.gfx.config;
+        short[3] mem = [depth, *cast(short*)&color, *(cast(short*)&color + 1)];
         //import core.stdc.string;
         with(this_)
         {
@@ -389,16 +486,37 @@ struct Renderer
             memcpy(ptr+16,pos.data.ptr,8);
             memcpy(ptr+32,coords.data.ptr,16);*/
 
+            short[] verts = cast(short[])threads[thread_id].block.batch_vertices;
+            uint item_id = threads[thread_id].block.items;
+
             if(angle == 0)
             {
-                batch_vertices[item_id*16]    = GfxConfig.meshes[mesh_id].vertices[0] * size.x + pos.x;
-                batch_vertices[item_id*16+1]  = GfxConfig.meshes[mesh_id].vertices[1] * size.y + pos.y;
-                batch_vertices[item_id*16+4]  = GfxConfig.meshes[mesh_id].vertices[4] * size.x + pos.x;
-                batch_vertices[item_id*16+5]  = GfxConfig.meshes[mesh_id].vertices[5] * size.y + pos.y;
-                batch_vertices[item_id*16+8]  = GfxConfig.meshes[mesh_id].vertices[8] * size.x + pos.x;
-                batch_vertices[item_id*16+9]  = GfxConfig.meshes[mesh_id].vertices[9] * size.y + pos.y;
-                batch_vertices[item_id*16+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x + pos.x;
-                batch_vertices[item_id*16+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y + pos.y;
+                verts[item_id*28]    = cast(short)((GfxConfig.meshes[mesh_id].vertices[0] * size.x + pos.x) * 8191);
+                verts[item_id*28+1]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[1] * size.y + pos.y) * 8191);
+                verts[item_id*28+2]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x)*32767);
+                verts[item_id*28+3]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+4,mem.ptr,6);
+
+
+                verts[item_id*28+7]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[4] * size.x + pos.x) * 8191);
+                verts[item_id*28+8]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[5] * size.y + pos.y) * 8191);
+                verts[item_id*28+9]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x)*32767);
+                verts[item_id*28+10]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+11,mem.ptr,6);
+
+
+                verts[item_id*28+14]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[8] * size.x + pos.x) * 8191);
+                verts[item_id*28+15]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[9] * size.y + pos.y) * 8191);
+                verts[item_id*28+16] = cast(short)((GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x)*32767);
+                verts[item_id*28+17] = cast(short)((GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+18,mem.ptr,6);
+
+
+                verts[item_id*28+21] = cast(short)((GfxConfig.meshes[mesh_id].vertices[12] * size.x + pos.x) * 8191);
+                verts[item_id*28+22] = cast(short)((GfxConfig.meshes[mesh_id].vertices[13] * size.y + pos.y) * 8191);
+                verts[item_id*28+23] = cast(short)((GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x)*32767);
+                verts[item_id*28+24] = cast(short)((GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+25,mem.ptr,6);
             }
             else
             {
@@ -406,50 +524,72 @@ struct Renderer
                 float sinn = sinf(angle);
                 float coss = cosf(angle);
 
-                /*batch_vertices[item_id*16]    = GfxConfig.meshes[mesh_id].vertices[0]  * size.x;
-                batch_vertices[item_id*16+1]  = GfxConfig.meshes[mesh_id].vertices[1]  * size.y;
-                batch_vertices[item_id*16+4]  = GfxConfig.meshes[mesh_id].vertices[4]  * size.x;
-                batch_vertices[item_id*16+5]  = GfxConfig.meshes[mesh_id].vertices[5]  * size.y;
-                batch_vertices[item_id*16+8]  = GfxConfig.meshes[mesh_id].vertices[8]  * size.x;
-                batch_vertices[item_id*16+9]  = GfxConfig.meshes[mesh_id].vertices[9]  * size.y;
-                batch_vertices[item_id*16+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x;
-                batch_vertices[item_id*16+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y;*/
+                /*batch_vertices[item_id*28]    = GfxConfig.meshes[mesh_id].vertices[0]  * size.x;
+                batch_vertices[item_id*28+1]  = GfxConfig.meshes[mesh_id].vertices[1]  * size.y;
+                batch_vertices[item_id*28+4]  = GfxConfig.meshes[mesh_id].vertices[4]  * size.x;
+                batch_vertices[item_id*28+5]  = GfxConfig.meshes[mesh_id].vertices[5]  * size.y;
+                batch_vertices[item_id*28+8]  = GfxConfig.meshes[mesh_id].vertices[8]  * size.x;
+                batch_vertices[item_id*28+9]  = GfxConfig.meshes[mesh_id].vertices[9]  * size.y;
+                batch_vertices[item_id*28+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x;
+                batch_vertices[item_id*28+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y;*/
 
-                batch_vertices[item_id*16]    = (GfxConfig.meshes[mesh_id].vertices[0]  * coss + GfxConfig.meshes[mesh_id].vertices[1]  * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+1]  = (GfxConfig.meshes[mesh_id].vertices[1]  * coss - GfxConfig.meshes[mesh_id].vertices[0]  * sinn) * size.y + pos.y;
-                batch_vertices[item_id*16+4]  = (GfxConfig.meshes[mesh_id].vertices[4]  * coss + GfxConfig.meshes[mesh_id].vertices[5]  * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+5]  = (GfxConfig.meshes[mesh_id].vertices[5]  * coss - GfxConfig.meshes[mesh_id].vertices[4]  * sinn) * size.y + pos.y;
-                batch_vertices[item_id*16+8]  = (GfxConfig.meshes[mesh_id].vertices[8]  * coss + GfxConfig.meshes[mesh_id].vertices[9]  * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+9]  = (GfxConfig.meshes[mesh_id].vertices[9]  * coss - GfxConfig.meshes[mesh_id].vertices[8]  * sinn) * size.y + pos.y;
-                batch_vertices[item_id*16+12] = (GfxConfig.meshes[mesh_id].vertices[12] * coss + GfxConfig.meshes[mesh_id].vertices[13] * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+13] = (GfxConfig.meshes[mesh_id].vertices[13] * coss - GfxConfig.meshes[mesh_id].vertices[12] * sinn) * size.y + pos.y;
+                verts[item_id*28]    = cast(short)(((GfxConfig.meshes[mesh_id].vertices[0]  * coss + GfxConfig.meshes[mesh_id].vertices[1]  * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+1]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[1]  * coss - GfxConfig.meshes[mesh_id].vertices[0]  * sinn) * size.y + pos.y) * 8191);
+                verts[item_id*28+7]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[4]  * coss + GfxConfig.meshes[mesh_id].vertices[5]  * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+8]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[5]  * coss - GfxConfig.meshes[mesh_id].vertices[4]  * sinn) * size.y + pos.y) * 8191);
+                verts[item_id*28+14]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[8]  * coss + GfxConfig.meshes[mesh_id].vertices[9]  * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+15]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[9]  * coss - GfxConfig.meshes[mesh_id].vertices[8]  * sinn) * size.y + pos.y) * 8191);
+                verts[item_id*28+21] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[12] * coss + GfxConfig.meshes[mesh_id].vertices[13] * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+22] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[13] * coss - GfxConfig.meshes[mesh_id].vertices[12] * sinn) * size.y + pos.y) * 8191);
             }
 
-            batch_vertices[item_id*16+2]  = GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x;
-            batch_vertices[item_id*16+3]  = GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y;
-            batch_vertices[item_id*16+6]  = GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x;
-            batch_vertices[item_id*16+7]  = GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y;
-            batch_vertices[item_id*16+10] = GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x;
-            batch_vertices[item_id*16+11] = GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y;
-            batch_vertices[item_id*16+14] = GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x;
-            batch_vertices[item_id*16+15] = GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y;
+            /*verts[item_id*28+2]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x)*32767);
+            verts[item_id*28+3]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y)*32767);
+            verts[item_id*28+9]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x)*32767);
+            verts[item_id*28+10]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y)*32767);
+            verts[item_id*28+16] = cast(short)((GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x)*32767);
+            verts[item_id*28+17] = cast(short)((GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y)*32767);
+            verts[item_id*28+23] = cast(short)((GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x)*32767);
+            verts[item_id*28+24] = cast(short)((GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y)*32767);*/
 
-            uint ind_id = item_id % batch_size;
+            /*verts[item_id*28+4] = depth;
+            verts[item_id*28+11] = depth;
+            verts[item_id*28+18] = depth;
+            verts[item_id*28+25] = depth;
 
-            batch_indices[item_id*6]   = cast(ushort)(GfxConfig.meshes[mesh_id].indices[0] + ind_id*4);
-            batch_indices[item_id*6+1] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[1] + ind_id*4);
-            batch_indices[item_id*6+2] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[2] + ind_id*4);
-            batch_indices[item_id*6+3] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[3] + ind_id*4);
-            batch_indices[item_id*6+4] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[4] + ind_id*4);
-            batch_indices[item_id*6+5] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[5] + ind_id*4);
+            *cast(uint*)&verts[item_id*28+5] = color;
+            *cast(uint*)&verts[item_id*28+12] = color;
+            *cast(uint*)&verts[item_id*28+19] = color;
+            *cast(uint*)&verts[item_id*28+26] = color;
+            
+            memcpy(verts.ptr+item_id*28+4,mem.ptr,6);
+            memcpy(verts.ptr+item_id*28+11,mem.ptr,6);
+            memcpy(verts.ptr+item_id*28+18,mem.ptr,6);
+            memcpy(verts.ptr+item_id*28+25,mem.ptr,6);*/
+
+            uint ind_id = (item_id % batch_size)*4;
+
+            ushort[] indices = threads[thread_id].block.batch_indices;
+
+            indices[item_id*6]   = cast(ushort)(GfxConfig.meshes[mesh_id].indices[0] + ind_id);
+            indices[item_id*6+1] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[1] + ind_id);
+            indices[item_id*6+2] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[2] + ind_id);
+            indices[item_id*6+3] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[3] + ind_id);
+            indices[item_id*6+4] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[4] + ind_id);
+            indices[item_id*6+5] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[5] + ind_id);
 
             //render_list[item_id] = RenderData(tex,material_id,mesh_id);
-            render_list[item_id].texture = tex;
-            render_list[item_id].material_id = material_id;
-            render_list[item_id].mesh_id = mesh_id;
+            //render_list[item_id].texture = tex;
+            //render_list[item_id].material_id = material_id;
+            //render_list[item_id].mesh_id = mesh_id;
             
             //data_index += 1;//data_offset;
-            item_id++;
+            threads[thread_id].block.items++;
+            if(threads[thread_id].block.items >= VertexBlock.max_items)
+            {
+                pushBlock(threads[thread_id].block);
+                threads[thread_id].block = getBlock();
+            }
         }
     }
 
@@ -467,9 +607,22 @@ struct Renderer
     {
         glClearColor(0,0,0,0);
         glViewport(0,0,this_.resolution.x,this_.resolution.y);
-        glClear(GL_COLOR_BUFFER_BIT);// | GL_DEPTH_BUFFER_BIT);
-        glDisable(GL_DEPTH_TEST);
+        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+        //glDisable(GL_DEPTH_TEST);
+        glEnable(GL_DEPTH_TEST);
         glDisable(GL_CULL_FACE);
+        glDepthFunc(GL_LESS);
+
+        version(WebAssembly)
+        {
+            glDepthRangef(0,1);
+        }
+        else
+        {
+            glDepthRange(0,1);
+        }
+        //glDepthRange(0,1);
+        //glClearDepth(1);
     }
 
     void present()
@@ -484,6 +637,10 @@ struct Renderer
 
     private static void __present_gl(ref Renderer this_)
     {
+
+        this_.pushThreadsBlocks();
+        this_.pushData();
+
         glViewport(0,0,this_.resolution.x,this_.resolution.y);
         //glEnable(GL_ALPHA_TEST);
         //glAlphaFunc(GL_GREATER, 0.01);
@@ -505,14 +662,17 @@ struct Renderer
                     break;
                 case Technique.vbo_batch:
                 //if(data_index){
-                    batch_vbo[0].bufferSubData(Buffer.BindTarget.array,item_id*4*16,0,batch_vertices.ptr);
-                    batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,item_id*6*2,0,batch_indices.ptr);
+                    //batch_vbo[0].bufferSubData(Buffer.BindTarget.array,item_id*4*14,0,batch_vertices.ptr);
+                    //batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,item_id*6*2,0,batch_indices.ptr);
 
                     batch_vbo[0].bind(Buffer.BindTarget.array);
                     batch_ibo[0].bind(Buffer.BindTarget.element_array);
 
-                    glVertexAttribPointer(0,2,GL_FLOAT,false,16,null);
-                    glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)8);//}
+                    //glVertexAttribPointer(0,2,GL_SHORT,true,14,null);
+                    //glVertexAttribPointer(1,2,GL_SHORT,true,14,cast(void*)4);//}
+                    glEnableVertexAttribArray(2);
+                    glEnableVertexAttribArray(3);
+                    //glVertexAttribPointer(2,1,GL_SHORT,true,14,cast(void*)6);//}
                     break;
                 case Technique.instanced_attrib_divisor:
                     ubos[0].bufferSubData(Buffer.BindTarget.uniform,data_index,0,uniform_block.ptr);
@@ -609,8 +769,8 @@ struct Renderer
             //glBeginQuery(GL_TIME_ELAPSED, time_queries[0]);
             if(technique == Technique.vbo_batch)
             {
-                uint items = item_id/batch_size+1;
-                foreach(i; 0..items)
+                //uint items = item_id/batch_size+1;
+                foreach(i; 0..draw_list.length)
                 {
                     if(material_id != render_list[i].material_id)
                     {
@@ -625,16 +785,20 @@ struct Renderer
                         render_list[i].texture.bind();
                     }
 
-                    uint instance_count = batch_size;
+                    /*uint instance_count = batch_size;
                     if((i+1)*batch_size > item_id)
                     {
                         instance_count = item_id%batch_size;
-                    }
+                    }*/
 
-                    glVertexAttribPointer(0,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16));
-                    glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16+8));
+                    // glVertexAttribPointer(0,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16));
+                    // glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16+8));
+                    glVertexAttribPointer(0,2,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14));
+                    glVertexAttribPointer(1,2,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14+4));
+                    glVertexAttribPointer(2,1,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14+8));
+                    glVertexAttribPointer(3,4,GL_UNSIGNED_BYTE,true,14,cast(void*)(draw_list[i].start*4*14+10));
 
-                    glDrawElements(GL_TRIANGLES,instance_count*6,GL_UNSIGNED_SHORT,cast(void*)(i*batch_size*6*2));
+                    glDrawElements(GL_TRIANGLES,draw_list[i].count*6,GL_UNSIGNED_SHORT,cast(void*)(draw_list[i].start*6*2));
 
                     //glDrawElementsBaseVertex(GL_TRIANGLES,instance_count*6,GL_UNSIGNED_SHORT,cast(void*)(i*16_384*6*2),i*16_384*4);
                 }
@@ -817,6 +981,9 @@ struct Renderer
         }
         glDisableVertexAttribArray(0);
         glDisableVertexAttribArray(1);
+        glDisableVertexAttribArray(2);
+        glDisableVertexAttribArray(3);
+        this_.freeBlocks();
         /*glUseProgram(0);
         glBindBuffer(GL_ARRAY_BUFFER, 0);
         glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);*/