Index: branches/SHM-dev/source/Lib/TLibEncoder/TEncGOP.cpp
===================================================================
--- branches/SHM-dev/source/Lib/TLibEncoder/TEncGOP.cpp	(revision 1317)
+++ branches/SHM-dev/source/Lib/TLibEncoder/TEncGOP.cpp	(revision 1318)
@@ -2406,4 +2406,5 @@
     pcPic->getSlice(pcSlice->getSliceIdx())->setMvdL1ZeroFlag(pcSlice->getMvdL1ZeroFlag());
 
+
     Double lambda            = 0.0;
     Int actualHeadBits       = 0;
@@ -2411,5 +2412,5 @@
     Int estimatedBits        = 0;
     Int tmpBitsBeforeWriting = 0;
-    if ( m_pcCfg->getUseRateCtrl() )
+    if ( m_pcCfg->getUseRateCtrl() ) // TODO: does this work with multiple slices and slice-segments?
     {
       Int frameLevel = m_pcRateCtrl->getRCSeq()->getGOPID2Level( iGOPid );
@@ -2438,5 +2439,5 @@
       else if ( frameLevel == 0 )   // intra case, but use the model
       {
-        m_pcSliceEncoder->calCostSliceI(pcPic);
+        m_pcSliceEncoder->calCostSliceI(pcPic); // TODO: This only analyses the first slice segment - what about the others?
 
         if ( m_pcCfg->getIntraPeriod() != 1 )   // do not refine allocated bits for all intra case
@@ -2505,5 +2506,5 @@
       {
         m_pcSliceEncoder->precompressSlice( pcPic );
-        m_pcSliceEncoder->compressSlice   ( pcPic );
+        m_pcSliceEncoder->compressSlice   ( pcPic, false );
 
         const UInt curSliceSegmentEnd = pcSlice->getSliceSegmentCurEndCtuTsAddr();
Index: branches/SHM-dev/source/Lib/TLibEncoder/TEncSlice.cpp
===================================================================
--- branches/SHM-dev/source/Lib/TLibEncoder/TEncSlice.cpp	(revision 1317)
+++ branches/SHM-dev/source/Lib/TLibEncoder/TEncSlice.cpp	(revision 1318)
@@ -639,7 +639,24 @@
     printf( "\nMultiple QP optimization is not allowed when rate control is enabled." );
     assert(0);
+    return;
   }
 
   TComSlice* pcSlice        = pcPic->getSlice(getSliceIdx());
+
+  if (pcSlice->getDependentSliceSegmentFlag())
+  {
+    // if this is a dependent slice segment, then it was optimised
+    // when analysing the entire slice.
+    return;
+  }
+
+  if (pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES)
+  {
+    // TODO: investigate use of average cost per CTU so that this Slice Mode can be used.
+    printf( "\nUnable to optimise Slice-level QP if Slice Mode is set to FIXED_NUMBER_OF_BYTES\n" );
+    assert(0);
+    return;
+  }
+
   Double     dPicRdCostBest = MAX_DOUBLE;
   UInt       uiQpIdxBest = 0;
@@ -667,5 +684,4 @@
   m_pcRdCost      ->setFrameLambda(dFrameLambda);
 
-  const UInt initialSliceQp=pcSlice->getSliceQp();
   // for each QP candidate
   for ( UInt uiQpIdx = 0; uiQpIdx < 2 * m_pcCfg->getDeltaQpRD() + 1; uiQpIdx++ )
@@ -678,15 +694,16 @@
 
     // try compress
-    compressSlice   ( pcPic );
-
-    Double dPicRdCost;
-    UInt64 uiPicDist        = m_uiPicDist;
-    // TODO: will this work if multiple slices are being used? There may not be any reconstruction data yet.
-    //       Will this also be ideal if a byte-restriction is placed on the slice?
-    //         - what if the last CTU was sometimes included, sometimes not, and that had all the distortion?
-    m_pcGOPEncoder->preLoopFilterPicAll( pcPic, uiPicDist );
+    compressSlice   ( pcPic, true );
+
+    UInt64 uiPicDist        = m_uiPicDist; // Distortion, as calculated by compressSlice.
+    // NOTE: This distortion is the chroma-weighted SSE distortion for the slice.
+    //       Previously a standard SSE distortion was calculated (for the entire frame).
+    //       Which is correct?
+
+    // TODO: Update loop filter, SAO and distortion calculation to work on one slice only.
+    // m_pcGOPEncoder->preLoopFilterPicAll( pcPic, uiPicDist );
 
     // compute RD cost and choose the best
-    dPicRdCost = m_pcRdCost->calcRdCost64( m_uiPicTotalBits, uiPicDist, true, DF_SSE_FRAME);
+    Double dPicRdCost = m_pcRdCost->calcRdCost64( m_uiPicTotalBits, uiPicDist, true, DF_SSE_FRAME); // NOTE: Is the 'true' parameter really necessary?
 
     if ( dPicRdCost < dPicRdCostBest )
@@ -697,10 +714,4 @@
   }
 
-  if (pcSlice->getDependentSliceSegmentFlag() && initialSliceQp!=m_piRdPicQp[uiQpIdxBest] )
-  {
-    // TODO: this won't work with dependent slices: they do not have their own QP.
-    fprintf(stderr,"ERROR - attempt to change QP for a dependent slice-segment, having already coded the slice\n");
-    assert(pcSlice->getDependentSliceSegmentFlag()==false || initialSliceQp==m_piRdPicQp[uiQpIdxBest]);
-  }
   // set best values
   pcSlice       ->setSliceQp             ( m_piRdPicQp    [uiQpIdxBest] );
@@ -711,5 +722,5 @@
 }
 
-Void TEncSlice::calCostSliceI(TComPic* pcPic)
+Void TEncSlice::calCostSliceI(TComPic* pcPic) // TODO: this only analyses the first slice segment. What about the others?
 {
   Double            iSumHadSlice      = 0;
@@ -755,6 +766,9 @@
 /** \param pcPic   picture class
  */
-Void TEncSlice::compressSlice( TComPic* pcPic )
-{
+Void TEncSlice::compressSlice( TComPic* pcPic, const Bool bCompressEntireSlice )
+{
+  // if bCompressEntireSlice is true, then the entire slice (not slice segment) is compressed,
+  //   effectively disabling the slice-segment-mode.
+
   UInt   startCtuTsAddr;
   UInt   boundingCtuTsAddr;
@@ -762,4 +776,9 @@
   pcSlice->setSliceSegmentBits(0);
   xDetermineStartAndBoundingCtuTsAddr ( startCtuTsAddr, boundingCtuTsAddr, pcPic );
+  if (bCompressEntireSlice)
+  {
+    boundingCtuTsAddr = pcSlice->getSliceCurEndCtuTsAddr();
+    pcSlice->setSliceSegmentCurEndCtuTsAddr(boundingCtuTsAddr);
+  }
 
   // initialize cost values - these are used by precompressSlice (they should be parameters).
@@ -817,5 +836,5 @@
   {
     // TODO: this won't work with dependent slices: they do not have their own QP. Check fix to mask clause execution with && !(pcSlice->getDependentSliceSegmentFlag())
-    m_pcTrQuant->clearSliceARLCnt();
+    m_pcTrQuant->clearSliceARLCnt(); // TODO: this looks wrong for multiple slices - the results of all but the last slice will be cleared before they are used (all slices compressed, and then all slices encoded)
     if(pcSlice->getSliceType()!=I_SLICE)
     {
@@ -970,5 +989,5 @@
       boundingCtuTsAddr=validEndOfSliceCtuTsAddr;
     }
-    else if(pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceSegmentBits()+numberOfWrittenBits > (pcSlice->getSliceSegmentArgument()<<3))
+    else if((!bCompressEntireSlice) && pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceSegmentBits()+numberOfWrittenBits > (pcSlice->getSliceSegmentArgument()<<3))
     {
       pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
@@ -1225,5 +1244,5 @@
   if( m_pcCfg->getUseAdaptQpSelect() )
   {
-    m_pcTrQuant->storeSliceQpNext(pcSlice);
+    m_pcTrQuant->storeSliceQpNext(pcSlice); // TODO: this will only be storing the adaptive QP state of the very last slice-segment that is not dependent in the frame... Perhaps this should be moved to the compress slice loop.
   }
 #endif
Index: branches/SHM-dev/source/Lib/TLibEncoder/TEncSlice.h
===================================================================
--- branches/SHM-dev/source/Lib/TLibEncoder/TEncSlice.h	(revision 1317)
+++ branches/SHM-dev/source/Lib/TLibEncoder/TEncSlice.h	(revision 1318)
@@ -124,6 +124,6 @@
   Void    resetQP             ( TComPic* pic, Int sliceQP, Double lambda );
   // compress and encode slice
-  Void    precompressSlice    ( TComPic* pcPic                                     );      ///< precompress slice for multi-loop opt.
-  Void    compressSlice       ( TComPic* pcPic                                     );      ///< analysis stage of slice
+  Void    precompressSlice    ( TComPic* pcPic                                     );      ///< precompress slice for multi-loop slice-level QP opt.
+  Void    compressSlice       ( TComPic* pcPic, const Bool bCompressEntireSlice    );      ///< analysis stage of slice
   Void    calCostSliceI       ( TComPic* pcPic );
   Void    encodeSlice         ( TComPic* pcPic, TComOutputBitstream* pcSubstreams, UInt &numBinsCoded );
