Improved Demo and multithreading rendering:

-added support for multithreaded rendering (fast) -improved shaders -added support for rendering depth -added rendering color support -improved DeptThreadPool (dynamics setting number of tryWait counts before TryWait. Low cpu usage with high responivity) -added possibility to change number of threads
2020-05-09 19:41:00 +02:00 · 2020-05-09 19:41:00 +02:00 · c94510a487
commit c94510a487
parent f6e7af1014
8 changed files with 311 additions and 105 deletions
--- a/demos/assets/shaders/base.fp
+++ b/demos/assets/shaders/base.fp
@ -50,6 +50,6 @@ uniform sampler2D tex;

 void main() 
 {
-    gl_FragColor  = TEX(tex,uv);// * color;
+    gl_FragColor  = TEX(tex,uv) * color;
 	if(gl_FragColor.a < 0.01)discard;
 }
--- a/demos/assets/shaders/base.vp
+++ b/demos/assets/shaders/base.vp
@ -70,7 +70,7 @@ precision lowp samplerCube;
 	#endif
 #endif*/

-
+#define VBO_BATCH 1

 M_OUT vec2 uv;
 L_OUT vec4 color;
@ -91,12 +91,16 @@ LOC(1) ATT vec2 tex_coords;
 #endif

 void main() {
+	#ifdef VBO_BATCH
+        vec3 position = vec3(positions*4,1.0);
+        uv = tex_coords;
+    #else
+        vec3 position = mat3(matrix_1.x,matrix_1.y,0,matrix_1.z,matrix_1.w,0,matrix_2.xy,1) * vec3(positions,1.0);
+        uv = tex_coords * uv_transform.zw + uv_transform.xy;
+    #endif
 	
-	vec3 position = mat3(matrix_1.x,matrix_1.y,0,matrix_1.z,matrix_1.w,0,matrix_2.xy,1) * vec3(positions,1.0);
-    position.z = depth;
-	uv = tex_coords * uv_transform.zw + uv_transform.xy;
 	color = vcolor;
    
-	gl_Position = vec4(position.xy,0,1.0);
+	gl_Position = vec4(position.xy,depth,1.0);
 	
 }
--- a/demos/external/sources/mmutils/thread_pool.d
+++ b/demos/external/sources/mmutils/thread_pool.d
@ -9,13 +9,14 @@ import bubel.ecs.atomic;
 //import std.stdio;
 import std.algorithm : map;

-//version = MM_NO_LOGS; // Disable log creation
+version = MM_NO_LOGS; // Disable log creation
 //version = MM_USE_POSIX_THREADS; // Use posix threads insted of standard library, required for betterC

 version (Posix)version = MM_USE_POSIX_THREADS;

 version (WebAssembly)
 {
+	version = MM_NO_LOGS;
 	extern(C) struct FILE
 	{
 		
@ -799,6 +800,7 @@ struct ThreadPool
 	alias FlushLogsDelegaste = void delegate(ThreadData* threadData, JobLog[] logs); /// Type of delegate to flush logs
 	FlushLogsDelegaste onFlushLogs; /// User custom delegate to flush logs, if overriden defaultFlushLogs will be used. Can be sset after initialize() call
 	int logsCacheNum; /// Number of log cache entries. Should be set before setThreadsNum is called
+	int tryWaitCount = 2000; ///Number of times which tryWait are called before timedWait call. Higher value sets better response but takes CPU time even if there are no jobs.
 private:
 	ThreadData*[gMaxThreadsNum] threadsData; /// Data for threads
 	align(64) shared int threadsNum; /// Number of threads currentlu accepting jobs
@ -1455,7 +1457,7 @@ private void threadFunc(ThreadData* threadData)
 				while(!threadData.semaphore.tryWait())
 				{
 					tryWait++;
-					if(tryWait>5000)
+					if(tryWait>threadPool.tryWaitCount)
 					{
 						ok = false;
 						break;
--- a/demos/source/app.d
+++ b/demos/source/app.d
@ -58,6 +58,7 @@ struct Launcher
    uint style = 3;
    uint entities_count;
    bool multithreading;
+    int threads;
    ulong timer_freq;
    double delta_time;
    uint fps;
@ -253,13 +254,12 @@ void mainLoop(void* arg)
    if(launcher.tool && launcher.tool_repeat != 0 && launcher.mouse.left && !igIsWindowHovered(ImGuiHoveredFlags_AnyWindow) && !igIsWindowFocused(ImGuiFocusedFlags_AnyWindow))
    {
        float range = 500.0 / cast(float)launcher.tool_repeat;
-        launcher.repeat_time += launcher.delta_time*100;
+        launcher.repeat_time += launcher.delta_time;
        while(launcher.repeat_time > range)
        {
            launcher.repeat_time -= range;
            launcher.tool((launcher.mouse.position*launcher.scalling)-launcher.render_position, launcher.used_tool, launcher.tool_size);
        }
-        
    }

    version(WebAssembly)
@ -317,6 +317,14 @@ void mainLoop(void* arg)
            {
                launcher.multithreading = !launcher.multithreading;
            }
+            igSetNextItemWidth(0);
+            igLabelText("Threads:",null);
+            igSameLine(0,4);
+            if(igSliderInt("##Threads",&launcher.threads, 1, 12, null))//"Multithreading", null, launcher.multithreading, true))
+            {
+                launcher.job_updater.pool.setThreadsNum(launcher.threads);
+                //launcher.threads = !launcher.multithreading;
+            }
            if(igBeginMenu("Show",true))
            {
                if(igMenuItemBool("Statistics",null,launcher.show_stat_wnd,true))
@ -539,11 +547,14 @@ void mainLoop(void* arg)
    launcher.renderer.clear();

    double loop_time = launcher.getTime();
+    launcher.job_updater.pool.tryWaitCount = 5000;
    if(launcher.loop && !launcher.loop())
    {   
        quit();
        *cast(bool*)arg = false;
    }
+    launcher.job_updater.pool.tryWaitCount = 10;
+
    loop_time = launcher.getTime() - loop_time;

    double draw_time = launcher.getTime();
@ -785,7 +796,15 @@ void loadGFX()
    GfxConfig.materials[0].compile();
    GfxConfig.materials[0].bindAttribLocation("positions",0);
    GfxConfig.materials[0].bindAttribLocation("tex_coords",1);
+    GfxConfig.materials[0].bindAttribLocation("depth",2);
+    GfxConfig.materials[0].bindAttribLocation("vcolor",3);
    GfxConfig.materials[0].link();
+
+   /* import std.stdio;
+    writeln("positions ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"positions"));
+    writeln("tex_coords ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"tex_coords"));
+    writeln("depth ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"depth"));
+    writeln("vcolor ",glGetAttribLocation(GfxConfig.materials[0].data.modules[0].gl_handle,"vcolor"));*/
    
    GfxConfig.materials[0].data.uniforms = Mallocator.makeArray!(Material.Uniform)(3);
    GfxConfig.materials[0].data.uniforms[0] = Material.Uniform(Material.Type.float4, GfxConfig.materials[0].getLocation("matrix_1"), 0);
--- a/demos/source/demos/simple.d
+++ b/demos/source/demos/simple.d
@ -47,23 +47,25 @@ struct CTexture

 struct DrawSystem
 {
-    mixin ECS.System!1;
+    mixin ECS.System!32;

    struct EntitiesData
    {
        uint length;
+        uint thread_id;
        @readonly CTexture[] textures;
        @readonly CLocation[] locations;
    }

    void onUpdate(EntitiesData data)
    {
-        if(launcher.renderer.item_id >= launcher.renderer.MaxObjects)return;//simple leave loop if max visible objects count was reached
+        if(launcher.renderer.prepared_items >= launcher.renderer.MaxObjects)return;//simple leave loop if max visible objects count was reached
        foreach(i; 0..data.length)
        {
-            launcher.renderer.draw(data.textures[i].tex, data.locations[i].location, vec2(16,16), vec4(0,0,1,1), 0, 0 , 0);
+            launcher.renderer.draw(data.textures[i].tex, data.locations[i].location, vec2(16,16), vec4(0,0,1,1), cast(ushort)(data.locations[i].y*64+data.locations[i].x), uint.max, 0, 0, 0, data.thread_id);
            //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
        }
+        if(data.thread_id == 0)launcher.renderer.pushData();
    }
 }

--- a/demos/source/demos/snake.d
+++ b/demos/source/demos/snake.d
@ -607,7 +607,7 @@ struct DrawAppleSystem
    {
        foreach(i; 0..data.location.length)
        {
-            launcher.renderer.draw(snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,32*px,16*px,16*px), 0, 0 , 0);
+            launcher.renderer.draw(snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,32*px,16*px,16*px), 0, uint.max, 0);
        }
    }
 }
@ -677,16 +677,16 @@ struct DrawSnakeSystem
    {
        final switch(cast(ubyte)part)
        {
-            case SnakePart.tail_up:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.tail_down:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.tail_left:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.tail_right:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,144,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_ld:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,128,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_lu:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,144,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_rd:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,144,16,16)*px, 0, 0, 0);break;
-            case SnakePart.turn_ru:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,112,16,16)*px, 0, 0, 0);break;
-            case SnakePart.vertical:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,128,16,16)*px, 0, 0, 0);break;
-            case SnakePart.horizontal:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(48,128,16,16)*px, 0, 0, 0);break;
+            case SnakePart.tail_up:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.tail_down:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.tail_left:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.tail_right:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(0,144,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_ld:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,128,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_lu:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(32,144,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_rd:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,144,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.turn_ru:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(64,112,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.vertical:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(16,128,16,16)*px, 0, uint.max, 0);break;
+            case SnakePart.horizontal:launcher.renderer.draw(.snake.texture, cast(vec2)loc, vec2(16,16), vec4(48,128,16,16)*px, 0, uint.max, 0);break;
        }
    }

@ -698,10 +698,10 @@ struct DrawSnakeSystem
            scope vec2 loc = cast(vec2)(data.location[i].location * 16);
            final switch(snake.direction)
            {
-                case CMovement.Direction.up:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,112,16,16)*px, 0, 0 , 0);break;
-                case CMovement.Direction.down:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,144,16,16)*px, 0, 0 , 0);break;
-                case CMovement.Direction.left:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,128,16,16)*px, 0, 0 , 0);break;
-                case CMovement.Direction.right:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(32,128,16,16)*px, 0, 0 , 0);break;
+                case CMovement.Direction.up:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,112,16,16)*px, 0, uint.max, 0);break;
+                case CMovement.Direction.down:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(48,144,16,16)*px, 0, uint.max, 0);break;
+                case CMovement.Direction.left:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(0,128,16,16)*px, 0, uint.max, 0);break;
+                case CMovement.Direction.right:launcher.renderer.draw(.snake.texture, vec2(data.location[i].x*16,data.location[i].y*16), vec2(16,16), vec4(32,128,16,16)*px, 0, uint.max, 0);break;
            }
            if(snake.parts.length >1)
            {
--- a/demos/source/demos/space_invaders.d
+++ b/demos/source/demos/space_invaders.d
@ -183,6 +183,8 @@ struct CDepth
 {
    mixin ECS.Component;

+    alias depth this;
+
    short depth;
 }

@ -208,23 +210,33 @@ struct EChangeDirection

 struct DrawSystem
 {
-    mixin ECS.System!1;
+    mixin ECS.System!32;

    struct EntitiesData
    {
        uint length;
+        uint thread_id;
        @readonly CTexture[] textures;
        @readonly CLocation[] locations;
        @readonly CScale[] scale;
+        @readonly @optional CDepth[] depth;
    }

    void onUpdate(EntitiesData data)
    {
-        foreach(i; 0..data.length)
-        {
-            launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, 0, 0 , 0);
-            //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
-        }
+        if(!data.depth)
+            foreach(i; 0..data.length)
+            {
+                launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, cast(short)data.locations[i].y, uint.max, 0, 0, 0, data.thread_id);
+                //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
+            }
+        else
+            foreach(i; 0..data.length)
+            {
+                launcher.renderer.draw(data.textures[i].tex, data.locations[i].value, data.scale[i], data.textures[i].coords, cast(short)(data.depth[i] * 64 + data.locations[i].y), uint.max, 0, 0, 0, data.thread_id);
+                //draw(renderer, data.textures[i].tex, data.locations[i], vec2(32,32), vec4(0,0,1,1));
+            }
+        if(data.thread_id == 0)launcher.renderer.pushData();
    }
 }

--- a/demos/utils/source/ecs_utils/gfx/renderer.d
+++ b/demos/utils/source/ecs_utils/gfx/renderer.d
@ -11,8 +11,8 @@ import ecs_utils.math.vector;

 import bubel.ecs.block_allocator;
 import bubel.ecs.vector;
-
-import glad.gl.gl;
+version(WebAssembly)import glad.gl.gles2;
+else import glad.gl.gl;

 version = ver1;
 /*version(ver5)version = vv2;
@ -57,31 +57,105 @@ struct Renderer
    vec2 view_pos = vec2(-1,-1);
    vec2 view_size = vec2(1,1);

-    const uint batch_size = 16_384;
+    enum block_size = 2^^16;
+    enum batch_size  = block_size/68;//963;//16_384;
    //uint[2] time_queries;

    struct VertexBlock
    {
-        float[] batch_vertices;
+        enum max_items = batch_size;//963;
+        byte[] batch_vertices;
        ushort[] batch_indices;
        void* memory;
-        uint itmes = 0;
+        uint items = 0;
    }

+    Mutex* get_block_mutex;
+    Mutex* block_stack_mutex;
+
    VertexBlock getBlock()
    {
        VertexBlock block;
+        get_block_mutex.lock();
        block.memory = allocator.getBlock();
-        block.batch_vertices = (cast(float*)block.memory)[0 .. 1];
+        get_block_mutex.unlock();
+        block.batch_vertices = (cast(byte*)block.memory)[0 .. VertexBlock.max_items * 4 * 14];
+        block.batch_indices = (cast(ushort*)block.memory)[VertexBlock.max_items * 4 * 7 .. VertexBlock.max_items * (4 * 7 + 6)];
        return block;
    }

+    Vector!VertexBlock blocks;
+    uint current_block = 0;
+    uint render_blocks = 0;
+
+    void pushBlock(VertexBlock block)
+    {
+        block_stack_mutex.lock();
+        prepared_items += block.items;
+        blocks.add(block);
+        render_blocks++;
+        block_stack_mutex.unlock();
+    }
+
+    bool isRemainingBlocks()
+    {
+        if(render_blocks <= current_block)return false;
+        return true;
+    }
+
+    VertexBlock fetchBlock()
+    {
+        block_stack_mutex.lock();
+        VertexBlock block = blocks[current_block];
+        current_block++;
+        block_stack_mutex.unlock();
+        return block;
+    }
+
+    void freeBlocks()
+    {
+        block_stack_mutex.lock();
+        render_blocks = 0;
+        current_block = 0;
+        foreach(VertexBlock block; blocks)
+        {
+            allocator.freeBlock(block.memory);
+        }
+        blocks.clear;
+        prepared_items=0;
+        draw_list.clear();
+        block_stack_mutex.unlock();
+    }
+
+    void pushData()
+    {
+        //if(!isRemainingBlocks())return;
+        while(isRemainingBlocks())
+        {
+            VertexBlock block = fetchBlock();
+            uint items = block.items;
+            if(items + item_id >= MaxObjects)items = MaxObjects - item_id;
+            batch_vbo[0].bufferSubData(Buffer.BindTarget.array,items*4*14,item_id*4*14,block.batch_vertices.ptr);
+            batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,items*2*6,item_id*2*6,block.batch_indices.ptr);
+            draw_list.add(DrawCall(item_id,items));
+            item_id += items;
+        }
+    }
+
+    void pushThreadsBlocks()
+    {
+        foreach(i, ref Thread thread; threads)
+        {
+            pushBlock(thread.block);
+            thread.block = getBlock();
+        }
+    }

    struct Thread
    {
-
-        Vector!VertexBlock block;
+        //Vector!VertexBlock block;
        RenderData[] render_list;
+        VertexBlock block;
    }
    Thread[] threads;

@ -102,7 +176,7 @@ struct Renderer
    Buffer[2] batch_vbo;
    Buffer[2] batch_ibo;

-    float[] batch_vertices;
+    ubyte[] batch_vertices;
    ushort[] batch_indices;

    Buffer indirect_buffer;
@ -121,8 +195,17 @@ struct Renderer
        uint mesh_id;
    }

+    struct DrawCall
+    {
+        uint start;
+        uint count;
+    }
+
+    Vector!DrawCall draw_list;
+
    RenderData[] render_list;
    uint item_id;
+    uint prepared_items;

    uint[] multi_count;
    uint[] multi_offset;
@ -140,6 +223,18 @@ struct Renderer
    {
        //this.technique = __ecs_used_technique;
        __initialize(this);
+
+        get_block_mutex = Mallocator.make!Mutex();
+        block_stack_mutex = Mallocator.make!Mutex();
+        get_block_mutex.initialize();
+        block_stack_mutex.initialize();
+
+
+        threads = Mallocator.makeArray!Thread(12);
+        foreach(ref Thread thread;threads)
+        {
+            thread.block = getBlock();
+        }
    }

    private static void __initialize_gl(ref Renderer this_)
@ -172,16 +267,16 @@ struct Renderer
                case Technique.vbo_batch:
                    batch_vbo[0].create();
                    batch_ibo[0].create();
-                    batch_vbo[0].bufferData(Buffer.BindTarget.array,16,4*MaxObjects,BufferUsage,null);
+                    batch_vbo[0].bufferData(Buffer.BindTarget.array,14,4*MaxObjects,BufferUsage,null);
                    batch_ibo[0].bufferData(Buffer.BindTarget.element_array,2,6*MaxObjects,BufferUsage,null);

                    batch_vbo[1].create();
                    batch_ibo[1].create();
-                    batch_vbo[1].bufferData(Buffer.BindTarget.array,16,4*MaxObjects,BufferUsage,null);
+                    batch_vbo[1].bufferData(Buffer.BindTarget.array,14,4*MaxObjects,BufferUsage,null);
                    batch_ibo[1].bufferData(Buffer.BindTarget.element_array,2,6*MaxObjects,BufferUsage,null);

-                    batch_vertices = Mallocator.makeArray!float(16*MaxObjects);
-                    batch_indices = Mallocator.makeArray!ushort(6*MaxObjects);
+                    //batch_vertices = Mallocator.makeArray!ubyte(14*4*MaxObjects);
+                    //batch_indices = Mallocator.makeArray!ushort(6*MaxObjects);
                    break;
                case Technique.instanced_attrib_divisor:
                    goto case(Technique.uniform_buffer_indexed);
@ -285,7 +380,7 @@ struct Renderer
            SDL_Log("Uniform block max size: %u",block_max_size);
            SDL_Log("Data offset: %u",data_offset);

-            allocator = BlockAllocator(1245184, 32);
+            allocator = BlockAllocator(block_size, 32);
        }
    }

@ -296,7 +391,7 @@ struct Renderer

    void draw(Texture tex, vec2 pos, vec2 size, vec4 coords, short depth = 0, uint color = uint.max, float angle = 0, uint material_id = 0, uint mesh_id = 0, uint thread_id = 0)
    {
-        if(item_id >= MaxObjects)return;
+        if(prepared_items >= MaxObjects)return;
        __draw(this,tex,pos,size,coords,depth,color,angle,material_id,mesh_id,thread_id);
    }

@ -364,12 +459,14 @@ struct Renderer
            
            data_index += data_offset;
            item_id++;
+            prepared_items++;
        }
    }

    private static void __draw_gl_vbo_batch(ref Renderer this_, Texture tex, vec2 pos, vec2 size, vec4 coords, short depth, uint color, float angle, uint material_id, uint mesh_id, uint thread_id = 0)
    {
        import ecs_utils.gfx.config;
+        short[3] mem = [depth, *cast(short*)&color, *(cast(short*)&color + 1)];
        //import core.stdc.string;
        with(this_)
        {
@ -389,16 +486,37 @@ struct Renderer
            memcpy(ptr+16,pos.data.ptr,8);
            memcpy(ptr+32,coords.data.ptr,16);*/

+            short[] verts = cast(short[])threads[thread_id].block.batch_vertices;
+            uint item_id = threads[thread_id].block.items;
+
            if(angle == 0)
            {
-                batch_vertices[item_id*16]    = GfxConfig.meshes[mesh_id].vertices[0] * size.x + pos.x;
-                batch_vertices[item_id*16+1]  = GfxConfig.meshes[mesh_id].vertices[1] * size.y + pos.y;
-                batch_vertices[item_id*16+4]  = GfxConfig.meshes[mesh_id].vertices[4] * size.x + pos.x;
-                batch_vertices[item_id*16+5]  = GfxConfig.meshes[mesh_id].vertices[5] * size.y + pos.y;
-                batch_vertices[item_id*16+8]  = GfxConfig.meshes[mesh_id].vertices[8] * size.x + pos.x;
-                batch_vertices[item_id*16+9]  = GfxConfig.meshes[mesh_id].vertices[9] * size.y + pos.y;
-                batch_vertices[item_id*16+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x + pos.x;
-                batch_vertices[item_id*16+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y + pos.y;
+                verts[item_id*28]    = cast(short)((GfxConfig.meshes[mesh_id].vertices[0] * size.x + pos.x) * 8191);
+                verts[item_id*28+1]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[1] * size.y + pos.y) * 8191);
+                verts[item_id*28+2]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x)*32767);
+                verts[item_id*28+3]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+4,mem.ptr,6);
+
+
+                verts[item_id*28+7]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[4] * size.x + pos.x) * 8191);
+                verts[item_id*28+8]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[5] * size.y + pos.y) * 8191);
+                verts[item_id*28+9]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x)*32767);
+                verts[item_id*28+10]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+11,mem.ptr,6);
+
+
+                verts[item_id*28+14]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[8] * size.x + pos.x) * 8191);
+                verts[item_id*28+15]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[9] * size.y + pos.y) * 8191);
+                verts[item_id*28+16] = cast(short)((GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x)*32767);
+                verts[item_id*28+17] = cast(short)((GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+18,mem.ptr,6);
+
+
+                verts[item_id*28+21] = cast(short)((GfxConfig.meshes[mesh_id].vertices[12] * size.x + pos.x) * 8191);
+                verts[item_id*28+22] = cast(short)((GfxConfig.meshes[mesh_id].vertices[13] * size.y + pos.y) * 8191);
+                verts[item_id*28+23] = cast(short)((GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x)*32767);
+                verts[item_id*28+24] = cast(short)((GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y)*32767);
+                memcpy(verts.ptr+item_id*28+25,mem.ptr,6);
            }
            else
            {
@ -406,50 +524,72 @@ struct Renderer
                float sinn = sinf(angle);
                float coss = cosf(angle);

-                /*batch_vertices[item_id*16]    = GfxConfig.meshes[mesh_id].vertices[0]  * size.x;
-                batch_vertices[item_id*16+1]  = GfxConfig.meshes[mesh_id].vertices[1]  * size.y;
-                batch_vertices[item_id*16+4]  = GfxConfig.meshes[mesh_id].vertices[4]  * size.x;
-                batch_vertices[item_id*16+5]  = GfxConfig.meshes[mesh_id].vertices[5]  * size.y;
-                batch_vertices[item_id*16+8]  = GfxConfig.meshes[mesh_id].vertices[8]  * size.x;
-                batch_vertices[item_id*16+9]  = GfxConfig.meshes[mesh_id].vertices[9]  * size.y;
-                batch_vertices[item_id*16+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x;
-                batch_vertices[item_id*16+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y;*/
+                /*batch_vertices[item_id*28]    = GfxConfig.meshes[mesh_id].vertices[0]  * size.x;
+                batch_vertices[item_id*28+1]  = GfxConfig.meshes[mesh_id].vertices[1]  * size.y;
+                batch_vertices[item_id*28+4]  = GfxConfig.meshes[mesh_id].vertices[4]  * size.x;
+                batch_vertices[item_id*28+5]  = GfxConfig.meshes[mesh_id].vertices[5]  * size.y;
+                batch_vertices[item_id*28+8]  = GfxConfig.meshes[mesh_id].vertices[8]  * size.x;
+                batch_vertices[item_id*28+9]  = GfxConfig.meshes[mesh_id].vertices[9]  * size.y;
+                batch_vertices[item_id*28+12] = GfxConfig.meshes[mesh_id].vertices[12] * size.x;
+                batch_vertices[item_id*28+13] = GfxConfig.meshes[mesh_id].vertices[13] * size.y;*/

-                batch_vertices[item_id*16]    = (GfxConfig.meshes[mesh_id].vertices[0]  * coss + GfxConfig.meshes[mesh_id].vertices[1]  * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+1]  = (GfxConfig.meshes[mesh_id].vertices[1]  * coss - GfxConfig.meshes[mesh_id].vertices[0]  * sinn) * size.y + pos.y;
-                batch_vertices[item_id*16+4]  = (GfxConfig.meshes[mesh_id].vertices[4]  * coss + GfxConfig.meshes[mesh_id].vertices[5]  * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+5]  = (GfxConfig.meshes[mesh_id].vertices[5]  * coss - GfxConfig.meshes[mesh_id].vertices[4]  * sinn) * size.y + pos.y;
-                batch_vertices[item_id*16+8]  = (GfxConfig.meshes[mesh_id].vertices[8]  * coss + GfxConfig.meshes[mesh_id].vertices[9]  * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+9]  = (GfxConfig.meshes[mesh_id].vertices[9]  * coss - GfxConfig.meshes[mesh_id].vertices[8]  * sinn) * size.y + pos.y;
-                batch_vertices[item_id*16+12] = (GfxConfig.meshes[mesh_id].vertices[12] * coss + GfxConfig.meshes[mesh_id].vertices[13] * sinn) * size.x + pos.x;
-                batch_vertices[item_id*16+13] = (GfxConfig.meshes[mesh_id].vertices[13] * coss - GfxConfig.meshes[mesh_id].vertices[12] * sinn) * size.y + pos.y;
+                verts[item_id*28]    = cast(short)(((GfxConfig.meshes[mesh_id].vertices[0]  * coss + GfxConfig.meshes[mesh_id].vertices[1]  * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+1]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[1]  * coss - GfxConfig.meshes[mesh_id].vertices[0]  * sinn) * size.y + pos.y) * 8191);
+                verts[item_id*28+7]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[4]  * coss + GfxConfig.meshes[mesh_id].vertices[5]  * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+8]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[5]  * coss - GfxConfig.meshes[mesh_id].vertices[4]  * sinn) * size.y + pos.y) * 8191);
+                verts[item_id*28+14]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[8]  * coss + GfxConfig.meshes[mesh_id].vertices[9]  * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+15]  = cast(short)(((GfxConfig.meshes[mesh_id].vertices[9]  * coss - GfxConfig.meshes[mesh_id].vertices[8]  * sinn) * size.y + pos.y) * 8191);
+                verts[item_id*28+21] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[12] * coss + GfxConfig.meshes[mesh_id].vertices[13] * sinn) * size.x + pos.x) * 8191);
+                verts[item_id*28+22] = cast(short)(((GfxConfig.meshes[mesh_id].vertices[13] * coss - GfxConfig.meshes[mesh_id].vertices[12] * sinn) * size.y + pos.y) * 8191);
            }

-            batch_vertices[item_id*16+2]  = GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x;
-            batch_vertices[item_id*16+3]  = GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y;
-            batch_vertices[item_id*16+6]  = GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x;
-            batch_vertices[item_id*16+7]  = GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y;
-            batch_vertices[item_id*16+10] = GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x;
-            batch_vertices[item_id*16+11] = GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y;
-            batch_vertices[item_id*16+14] = GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x;
-            batch_vertices[item_id*16+15] = GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y;
+            /*verts[item_id*28+2]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[2] * coords.z + coords.x)*32767);
+            verts[item_id*28+3]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[3] * coords.w + coords.y)*32767);
+            verts[item_id*28+9]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[6] * coords.z + coords.x)*32767);
+            verts[item_id*28+10]  = cast(short)((GfxConfig.meshes[mesh_id].vertices[7] * coords.w + coords.y)*32767);
+            verts[item_id*28+16] = cast(short)((GfxConfig.meshes[mesh_id].vertices[10] * coords.z + coords.x)*32767);
+            verts[item_id*28+17] = cast(short)((GfxConfig.meshes[mesh_id].vertices[11] * coords.w + coords.y)*32767);
+            verts[item_id*28+23] = cast(short)((GfxConfig.meshes[mesh_id].vertices[14] * coords.z + coords.x)*32767);
+            verts[item_id*28+24] = cast(short)((GfxConfig.meshes[mesh_id].vertices[15] * coords.w + coords.y)*32767);*/

-            uint ind_id = item_id % batch_size;
+            /*verts[item_id*28+4] = depth;
+            verts[item_id*28+11] = depth;
+            verts[item_id*28+18] = depth;
+            verts[item_id*28+25] = depth;

-            batch_indices[item_id*6]   = cast(ushort)(GfxConfig.meshes[mesh_id].indices[0] + ind_id*4);
-            batch_indices[item_id*6+1] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[1] + ind_id*4);
-            batch_indices[item_id*6+2] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[2] + ind_id*4);
-            batch_indices[item_id*6+3] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[3] + ind_id*4);
-            batch_indices[item_id*6+4] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[4] + ind_id*4);
-            batch_indices[item_id*6+5] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[5] + ind_id*4);
+            *cast(uint*)&verts[item_id*28+5] = color;
+            *cast(uint*)&verts[item_id*28+12] = color;
+            *cast(uint*)&verts[item_id*28+19] = color;
+            *cast(uint*)&verts[item_id*28+26] = color;
+            
+            memcpy(verts.ptr+item_id*28+4,mem.ptr,6);
+            memcpy(verts.ptr+item_id*28+11,mem.ptr,6);
+            memcpy(verts.ptr+item_id*28+18,mem.ptr,6);
+            memcpy(verts.ptr+item_id*28+25,mem.ptr,6);*/
+
+            uint ind_id = (item_id % batch_size)*4;
+
+            ushort[] indices = threads[thread_id].block.batch_indices;
+
+            indices[item_id*6]   = cast(ushort)(GfxConfig.meshes[mesh_id].indices[0] + ind_id);
+            indices[item_id*6+1] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[1] + ind_id);
+            indices[item_id*6+2] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[2] + ind_id);
+            indices[item_id*6+3] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[3] + ind_id);
+            indices[item_id*6+4] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[4] + ind_id);
+            indices[item_id*6+5] = cast(ushort)(GfxConfig.meshes[mesh_id].indices[5] + ind_id);

            //render_list[item_id] = RenderData(tex,material_id,mesh_id);
-            render_list[item_id].texture = tex;
-            render_list[item_id].material_id = material_id;
-            render_list[item_id].mesh_id = mesh_id;
+            //render_list[item_id].texture = tex;
+            //render_list[item_id].material_id = material_id;
+            //render_list[item_id].mesh_id = mesh_id;
            
            //data_index += 1;//data_offset;
-            item_id++;
+            threads[thread_id].block.items++;
+            if(threads[thread_id].block.items >= VertexBlock.max_items)
+            {
+                pushBlock(threads[thread_id].block);
+                threads[thread_id].block = getBlock();
+            }
        }
    }

@ -467,9 +607,22 @@ struct Renderer
    {
        glClearColor(0,0,0,0);
        glViewport(0,0,this_.resolution.x,this_.resolution.y);
-        glClear(GL_COLOR_BUFFER_BIT);// | GL_DEPTH_BUFFER_BIT);
-        glDisable(GL_DEPTH_TEST);
+        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+        //glDisable(GL_DEPTH_TEST);
+        glEnable(GL_DEPTH_TEST);
        glDisable(GL_CULL_FACE);
+        glDepthFunc(GL_LESS);
+
+        version(WebAssembly)
+        {
+            glDepthRangef(0,1);
+        }
+        else
+        {
+            glDepthRange(0,1);
+        }
+        //glDepthRange(0,1);
+        //glClearDepth(1);
    }

    void present()
@ -484,6 +637,10 @@ struct Renderer

    private static void __present_gl(ref Renderer this_)
    {
+
+        this_.pushThreadsBlocks();
+        this_.pushData();
+
        glViewport(0,0,this_.resolution.x,this_.resolution.y);
        //glEnable(GL_ALPHA_TEST);
        //glAlphaFunc(GL_GREATER, 0.01);
@ -505,14 +662,17 @@ struct Renderer
                    break;
                case Technique.vbo_batch:
                //if(data_index){
-                    batch_vbo[0].bufferSubData(Buffer.BindTarget.array,item_id*4*16,0,batch_vertices.ptr);
-                    batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,item_id*6*2,0,batch_indices.ptr);
+                    //batch_vbo[0].bufferSubData(Buffer.BindTarget.array,item_id*4*14,0,batch_vertices.ptr);
+                    //batch_ibo[0].bufferSubData(Buffer.BindTarget.element_array,item_id*6*2,0,batch_indices.ptr);

                    batch_vbo[0].bind(Buffer.BindTarget.array);
                    batch_ibo[0].bind(Buffer.BindTarget.element_array);

-                    glVertexAttribPointer(0,2,GL_FLOAT,false,16,null);
-                    glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)8);//}
+                    //glVertexAttribPointer(0,2,GL_SHORT,true,14,null);
+                    //glVertexAttribPointer(1,2,GL_SHORT,true,14,cast(void*)4);//}
+                    glEnableVertexAttribArray(2);
+                    glEnableVertexAttribArray(3);
+                    //glVertexAttribPointer(2,1,GL_SHORT,true,14,cast(void*)6);//}
                    break;
                case Technique.instanced_attrib_divisor:
                    ubos[0].bufferSubData(Buffer.BindTarget.uniform,data_index,0,uniform_block.ptr);
@ -609,8 +769,8 @@ struct Renderer
            //glBeginQuery(GL_TIME_ELAPSED, time_queries[0]);
            if(technique == Technique.vbo_batch)
            {
-                uint items = item_id/batch_size+1;
-                foreach(i; 0..items)
+                //uint items = item_id/batch_size+1;
+                foreach(i; 0..draw_list.length)
                {
                    if(material_id != render_list[i].material_id)
                    {
@ -625,16 +785,20 @@ struct Renderer
                        render_list[i].texture.bind();
                    }

-                    uint instance_count = batch_size;
+                    /*uint instance_count = batch_size;
                    if((i+1)*batch_size > item_id)
                    {
                        instance_count = item_id%batch_size;
-                    }
+                    }*/

-                    glVertexAttribPointer(0,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16));
-                    glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16+8));
+                    // glVertexAttribPointer(0,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16));
+                    // glVertexAttribPointer(1,2,GL_FLOAT,false,16,cast(void*)(i*batch_size*4*16+8));
+                    glVertexAttribPointer(0,2,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14));
+                    glVertexAttribPointer(1,2,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14+4));
+                    glVertexAttribPointer(2,1,GL_SHORT,true,14,cast(void*)(draw_list[i].start*4*14+8));
+                    glVertexAttribPointer(3,4,GL_UNSIGNED_BYTE,true,14,cast(void*)(draw_list[i].start*4*14+10));

-                    glDrawElements(GL_TRIANGLES,instance_count*6,GL_UNSIGNED_SHORT,cast(void*)(i*batch_size*6*2));
+                    glDrawElements(GL_TRIANGLES,draw_list[i].count*6,GL_UNSIGNED_SHORT,cast(void*)(draw_list[i].start*6*2));

                    //glDrawElementsBaseVertex(GL_TRIANGLES,instance_count*6,GL_UNSIGNED_SHORT,cast(void*)(i*16_384*6*2),i*16_384*4);
                }
@ -817,6 +981,9 @@ struct Renderer
        }
        glDisableVertexAttribArray(0);
        glDisableVertexAttribArray(1);
+        glDisableVertexAttribArray(2);
+        glDisableVertexAttribArray(3);
+        this_.freeBlocks();
        /*glUseProgram(0);
        glBindBuffer(GL_ARRAY_BUFFER, 0);
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);*/