Index: branches/SHM-dev/source/Lib/TLibEncoder/TEncSearch.cpp
===================================================================
--- branches/SHM-dev/source/Lib/TLibEncoder/TEncSearch.cpp	(revision 1403)
+++ branches/SHM-dev/source/Lib/TLibEncoder/TEncSearch.cpp	(revision 1404)
@@ -346,52 +346,56 @@
     Distortion uiBitCost = m_pcRdCost->getCost( iSearchX, iSearchY );
 
-    if ( m_cDistParam.iRows > 32 )
-    {
-      m_cDistParam.iSubShift = 4;
-    }
-    else if ( m_cDistParam.iRows > 16 )
-    {
-      m_cDistParam.iSubShift = 3;
-    }
-    else if ( m_cDistParam.iRows > 8 )
-    {
-      m_cDistParam.iSubShift = 2;
-    }
-    else
-    {
-      m_cDistParam.iSubShift = 1;
-    }
-
-    Distortion uiTempSad = m_cDistParam.DistFunc( &m_cDistParam );
-    if((uiTempSad + uiBitCost) < rcStruct.uiBestSad)
-    {
-      uiSad += uiTempSad >>  m_cDistParam.iSubShift;
-      while(m_cDistParam.iSubShift > 0)
-      {
-        isubShift         = m_cDistParam.iSubShift -1;
-        m_cDistParam.pOrg = pcPatternKey->getROIY() + (pcPatternKey->getPatternLStride() << isubShift);
-        m_cDistParam.pCur = piRefSrch + (rcStruct.iYStride << isubShift);
-        uiTempSad = m_cDistParam.DistFunc( &m_cDistParam );
+    // Skip search if bit cost is already larger than best SAD
+    if (uiBitCost < rcStruct.uiBestSad)
+    {
+      if ( m_cDistParam.iRows > 32 )
+      {
+        m_cDistParam.iSubShift = 4;
+      }
+      else if ( m_cDistParam.iRows > 16 )
+      {
+        m_cDistParam.iSubShift = 3;
+      }
+      else if ( m_cDistParam.iRows > 8 )
+      {
+        m_cDistParam.iSubShift = 2;
+      }
+      else
+      {
+        m_cDistParam.iSubShift = 1;
+      }
+
+      Distortion uiTempSad = m_cDistParam.DistFunc( &m_cDistParam );
+      if((uiTempSad + uiBitCost) < rcStruct.uiBestSad)
+      {
         uiSad += uiTempSad >>  m_cDistParam.iSubShift;
-        if(((uiSad << isubShift) + uiBitCost) > rcStruct.uiBestSad)
-        {
-          break;
-        }
-
-        m_cDistParam.iSubShift--;
-      }
-
-      if(m_cDistParam.iSubShift == 0)
-      {
-        uiSad += uiBitCost;
-        if( uiSad < rcStruct.uiBestSad )
-        {
-          rcStruct.uiBestSad      = uiSad;
-          rcStruct.iBestX         = iSearchX;
-          rcStruct.iBestY         = iSearchY;
-          rcStruct.uiBestDistance = uiDistance;
-          rcStruct.uiBestRound    = 0;
-          rcStruct.ucPointNr      = ucPointNr;
-          m_cDistParam.m_maximumDistortionForEarlyExit = uiSad;
+        while(m_cDistParam.iSubShift > 0)
+        {
+          isubShift         = m_cDistParam.iSubShift -1;
+          m_cDistParam.pOrg = pcPatternKey->getROIY() + (pcPatternKey->getPatternLStride() << isubShift);
+          m_cDistParam.pCur = piRefSrch + (rcStruct.iYStride << isubShift);
+          uiTempSad = m_cDistParam.DistFunc( &m_cDistParam );
+          uiSad += uiTempSad >>  m_cDistParam.iSubShift;
+          if(((uiSad << isubShift) + uiBitCost) > rcStruct.uiBestSad)
+          {
+            break;
+          }
+
+          m_cDistParam.iSubShift--;
+        }
+
+        if(m_cDistParam.iSubShift == 0)
+        {
+          uiSad += uiBitCost;
+          if( uiSad < rcStruct.uiBestSad )
+          {
+            rcStruct.uiBestSad      = uiSad;
+            rcStruct.iBestX         = iSearchX;
+            rcStruct.iBestY         = iSearchY;
+            rcStruct.uiBestDistance = uiDistance;
+            rcStruct.uiBestRound    = 0;
+            rcStruct.ucPointNr      = ucPointNr;
+            m_cDistParam.m_maximumDistortionForEarlyExit = uiSad;
+          }
         }
       }
@@ -411,16 +415,22 @@
     uiSad = m_cDistParam.DistFunc( &m_cDistParam );
 
-    // motion cost
-    uiSad += m_pcRdCost->getCost( iSearchX, iSearchY );
-
+    // only add motion cost if uiSad is smaller than best. Otherwise pointless
+    // to add motion cost.
     if( uiSad < rcStruct.uiBestSad )
     {
-      rcStruct.uiBestSad      = uiSad;
-      rcStruct.iBestX         = iSearchX;
-      rcStruct.iBestY         = iSearchY;
-      rcStruct.uiBestDistance = uiDistance;
-      rcStruct.uiBestRound    = 0;
-      rcStruct.ucPointNr      = ucPointNr;
-      m_cDistParam.m_maximumDistortionForEarlyExit = uiSad;
+
+      // motion cost
+      uiSad += m_pcRdCost->getCost( iSearchX, iSearchY );
+
+      if( uiSad < rcStruct.uiBestSad )
+      {
+        rcStruct.uiBestSad      = uiSad;
+        rcStruct.iBestX         = iSearchX;
+        rcStruct.iBestY         = iSearchY;
+        rcStruct.uiBestDistance = uiDistance;
+        rcStruct.uiBestRound    = 0;
+        rcStruct.ucPointNr      = ucPointNr;
+        m_cDistParam.m_maximumDistortionForEarlyExit = uiSad;
+      }
     }
   }
@@ -3185,5 +3195,5 @@
 #endif
 
-    //  Bi-directional prediction
+    //  Bi-predictive Motion estimation
 #if REF_IDX_ME_ZEROMV
     if ( (pcCU->getSlice()->isInterB()) && (pcCU->isBipredRestriction(iPartIdx) == false) && !(pcCU->getSlice()->getMvdL1ZeroFlag() && bestBiPDist == MAX_INT) )
@@ -3825,6 +3835,4 @@
 
 
-
-
 Void TEncSearch::xMotionEstimation( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPartIdx, RefPicList eRefPicList, TComMv* pcMvPred, Int iRefIdxPred, TComMv& rcMv, UInt& ruiBits, Distortion& ruiCost, Bool bBi  )
 {
@@ -3850,5 +3858,5 @@
   pcCU->getPartIndexAndSize( iPartIdx, uiPartAddr, iRoiWidth, iRoiHeight );
 
-  if ( bBi )
+  if ( bBi ) // Bipredictive ME
   {
     TComYuv*  pcYuvOther = &m_acYuvPred[1-(Int)eRefPicList];
@@ -3952,6 +3960,4 @@
 
 
-
-
 Void TEncSearch::xSetSearchRange ( const TComDataCU* const pcCU, const TComMv& cMvPred, const Int iSrchRng,
                                    TComMv& rcMvSrchRngLT, TComMv& rcMvSrchRngRB )
@@ -3977,4 +3983,5 @@
 #endif
 }
+
 
 Void TEncSearch::xPatternSearch( const TComPattern* const pcPatternKey,
@@ -4040,5 +4047,4 @@
   return;
 }
-
 
 
@@ -4073,6 +4079,4 @@
   }
 }
-
-
 
 
@@ -4132,5 +4136,9 @@
       cMv >>= 2;
 #endif
-      xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 );
+      if (cMv != rcMv && (cMv.getHor() != cStruct.iBestX && cMv.getVer() != cStruct.iBestY))
+      {
+        // only test cMV if not obviously previously tested.
+        xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 );
+      }
     }
   }
@@ -4139,5 +4147,10 @@
   if ( bTestZeroVector )
   {
-    xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
+    if ((rcMv.getHor() != 0 || rcMv.getVer() != 0) &&
+        (0 != cStruct.iBestX || 0 != cStruct.iBestY))
+    {
+      // only test 0-vector if not obviously previously tested.
+      xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
+    }
   }
 
@@ -4157,5 +4170,10 @@
     integerMv2Nx2NPred >>= 2;
 #endif
-    xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
+    if ((rcMv != integerMv2Nx2NPred) &&
+        (integerMv2Nx2NPred.getHor() != cStruct.iBestX || integerMv2Nx2NPred.getVer() != cStruct.iBestY))
+    {
+      // only test integerMv2Nx2NPred if not obviously previously tested.
+      xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
+    }
 
     // reset search range
@@ -4265,5 +4283,5 @@
   }
 
-  // start refinement
+  // star refinement
   if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
   {
