diff --git a/librtt/Renderer/Rtt_Program.h b/librtt/Renderer/Rtt_Program.h index eaa9e28b5..bbb44089a 100644 --- a/librtt/Renderer/Rtt_Program.h +++ b/librtt/Renderer/Rtt_Program.h @@ -76,7 +76,7 @@ class Program : public CPUResource const char *GetHeaderSource() const { return fHeaderSource; } void SetHeaderSource( const char *source ); - ShaderResource *GetShaderResource() { return fResource; } + ShaderResource *GetShaderResource() const { return fResource; } void SetShaderResource( ShaderResource *resource ) { fResource = resource; } #if defined( Rtt_USE_PRECOMPILED_SHADERS ) ShaderBinaryVersions* GetCompiledShaders() const { return fCompiledShaders; } diff --git a/librtt/Renderer/Rtt_Renderer.cpp b/librtt/Renderer/Rtt_Renderer.cpp index 24eeaa706..4a0b0edba 100644 --- a/librtt/Renderer/Rtt_Renderer.cpp +++ b/librtt/Renderer/Rtt_Renderer.cpp @@ -27,6 +27,8 @@ #include "Core/Rtt_Types.h" #include "Renderer/Rtt_MCPUResourceObserver.h" +#include "Display/Rtt_ShaderResource.h" + #include "Rtt_Profiling.h" #define ENABLE_DEBUG_PRINT 0 @@ -452,6 +454,14 @@ Renderer::PopMaskCount() --fMaskCountIndex; } +static bool +OnlyTryingToDowngradeMod( const Program* newProgram, const Program* oldProgram ) +{ + const ShaderResource* resource = newProgram->GetShaderResource(); + + return NULL != oldProgram && resource == oldProgram->GetShaderResource() && resource->GetProgramMod( ShaderResource::k25D ) == oldProgram; +} + void Renderer::Insert( const RenderData* data ) { @@ -465,13 +475,21 @@ Renderer::Insert( const RenderData* data ) Rtt_ASSERT( fBackCommandBuffer != NULL ); Rtt_ASSERT( fFrontCommandBuffer != NULL ); + // If using deformed rects, avoid some batch-breaking when only other objects with the same effect are drawn, by not + // falling back to non-deformed variant. The deformed variant is a superset, with some additional operations. (TODO: + // In theory rendering many "normal" objects in a row will outweigh the switching cost. One idea here is to emit the + // geometry as though it were batch-friendly, since it might be, but perform speculative bind texture and draw commands + // If we do indeed cross the critical point, keep the commands; otherwise convert them to no-ops.) + bool programsDifferWeakly = data->fProgram != fPrevious.fProgram; + bool programsDifferStrongly = programsDifferWeakly && !OnlyTryingToDowngradeMod( data->fProgram, fPrevious.fProgram ); + bool blendDirty = data->fBlendMode != fPrevious.fBlendMode; bool blendEquationDirty = data->fBlendEquation != fPrevious.fBlendEquation; bool fillDirty0 = data->fFillTexture0 != fPrevious.fFillTexture0 && data->fFillTexture0; bool fillDirty1 = data->fFillTexture1 != fPrevious.fFillTexture1 && data->fFillTexture1; bool maskTextureDirty = data->fMaskTexture != fPrevious.fMaskTexture; // since PushMask() can stomp on the previous texture, a "not NULL" check here is unreliable bool maskUniformDirty = data->fMaskUniform != fPrevious.fMaskUniform; // ...ditto - bool programDirty = data->fProgram != fPrevious.fProgram || MaskCount() != fCurrentProgramMaskCount; + bool programDirty = programsDifferStrongly || MaskCount() != fCurrentProgramMaskCount; bool userUniformDirty0 = data->fUserUniform0 != fPrevious.fUserUniform0 && data->fUserUniform0; bool userUniformDirty1 = data->fUserUniform1 != fPrevious.fUserUniform1 && data->fUserUniform1; bool userUniformDirty2 = data->fUserUniform2 != fPrevious.fUserUniform2 && data->fUserUniform2; @@ -526,6 +544,8 @@ Renderer::Insert( const RenderData* data ) if( primitiveType != fPreviousPrimitiveType || primitiveType != Geometry::kTriangleStrip ) { batch = false; + + programsDifferStrongly = programsDifferWeakly; // non-strips do not always populate 'q' member of vertex } // If the previous RenderData had its Geometry stored on the GPU, @@ -688,7 +708,7 @@ Renderer::Insert( const RenderData* data ) // NOTE: The mask count is incremented just in time to select the correct program version, so we re-compare // instead of using programDirty which does the equivalent calculation for batching purposes. - if( data->fProgram != fPrevious.fProgram || MaskCount() != fCurrentProgramMaskCount ) + if( programsDifferStrongly || MaskCount() != fCurrentProgramMaskCount ) { if( !data->fProgram->fGPUResource ) {