SH-4410 Internal Documentation. Update and correct the mutex/data

Thu, 15 Aug 2013 19:00:43 -0400

author
Monty Brandenberg <monty@lindenlab.com>
date
Thu, 15 Aug 2013 19:00:43 -0400
changeset 40706
fc62f596cc97
parent 40705
4daab1adb0c3
child 40707
6173865727da

SH-4410 Internal Documentation. Update and correct the mutex/data
lists to reflect current. Describe the functional flow of things
for a single LOD request. Put together to-do list for follow on
work. Knock down the low/high water limits for GetMesh a bit,
100/200 too high, 75/150 should be better, vis-a-vis pathological
failures.

indra/newview/llmeshrepository.cpp file | annotate | diff | revisions
     1.1 --- a/indra/newview/llmeshrepository.cpp	Wed Aug 07 21:31:41 2013 -0400
     1.2 +++ b/indra/newview/llmeshrepository.cpp	Thu Aug 15 19:00:43 2013 -0400
     1.3 @@ -79,10 +79,6 @@
     1.4  #include <queue>
     1.5  
     1.6  
     1.7 -// [ Disclaimer:  this documentation isn't by one of the original authors
     1.8 -//   but by someone coming through later and extracting intent and function.
     1.9 -//   Some of this will be wrong so use judgement. ]
    1.10 -//
    1.11  // Purpose
    1.12  //
    1.13  //   The purpose of this module is to provide access between the viewer
    1.14 @@ -101,6 +97,7 @@
    1.15  //     * getMeshHeader (For structural details, see:
    1.16  //       http://wiki.secondlife.com/wiki/Mesh/Mesh_Asset_Format)
    1.17  //     * notifyLoadedMeshes
    1.18 +//     * getSkinInfo
    1.19  //
    1.20  // Threads
    1.21  //
    1.22 @@ -108,7 +105,54 @@
    1.23  //   repo     Overseeing worker thread associated with the LLMeshRepoThread class
    1.24  //   decom    Worker thread for mesh decomposition requests
    1.25  //   core     HTTP worker thread:  does the work but doesn't intrude here
    1.26 -//   uploadN  0-N temporary mesh upload threads
    1.27 +//   uploadN  0-N temporary mesh upload threads (0-1 in practice)
    1.28 +//
    1.29 +// Sequence of Operations
    1.30 +//
    1.31 +//   What follows is a description of the retrieval of one LOD for
    1.32 +//   a new mesh object.  Work is performed by a series of short, quick
    1.33 +//   actions distributed over a number of threads.  Each is meant
    1.34 +//   to proceed without stalling and the whole forms a deep request
    1.35 +//   pipeline to achieve throughput.  Ellipsis indicates a return
    1.36 +//   or break in processing which is resumed elsewhere.
    1.37 +//
    1.38 +//         main thread         repo thread (run() method)
    1.39 +//
    1.40 +//         loadMesh() invoked to request LOD
    1.41 +//           append LODRequest to mPendingRequests
    1.42 +//         ...
    1.43 +//         other mesh requests may be made
    1.44 +//         ...
    1.45 +//         notifyLoadedMeshes() invoked to stage work
    1.46 +//           append HeaderRequest to mHeaderReqQ
    1.47 +//         ...
    1.48 +//                             scan mHeaderReqQ
    1.49 +//                             issue 4096-byte GET for header
    1.50 +//                             ...
    1.51 +//                             onCompleted() invoked for GET
    1.52 +//                               data copied
    1.53 +//                               headerReceived() invoked
    1.54 +//                                 LLSD parsed
    1.55 +//                                 mMeshHeader, mMeshHeaderSize updated
    1.56 +//                                 scan mPendingLOD for LOD request
    1.57 +//                                 push LODRequest to mLODReqQ
    1.58 +//                             ...
    1.59 +//                             scan mLODReqQ
    1.60 +//                             fetchMeshLOD() invoked
    1.61 +//                               issue Byte-Range GET for LOD
    1.62 +//                             ...
    1.63 +//                             onCompleted() invoked for GET
    1.64 +//                               data copied
    1.65 +//                               lodReceived() invoked
    1.66 +//                                 unpack data into LLVolume
    1.67 +//                                 append LoadedMesh to mLoadedQ
    1.68 +//                             ...
    1.69 +//         notifyLoadedMeshes() invoked again
    1.70 +//           scan mLoadedQ
    1.71 +//           notifyMeshLoaded() for LOD
    1.72 +//             setMeshAssetLoaded() invoked for system volume
    1.73 +//             notifyMeshLoaded() invoked for each interested object
    1.74 +//         ...
    1.75  //
    1.76  // Mutexes
    1.77  //
    1.78 @@ -163,19 +207,19 @@
    1.79  //
    1.80  //   LLMeshRepository:
    1.81  //
    1.82 -//     sBytesReceived
    1.83 -//     sMeshRequestCount
    1.84 -//     sHTTPRequestCount
    1.85 -//     sHTTPLargeRequestCount
    1.86 -//     sHTTPRetryCount
    1.87 -//     sHTTPErrorCount
    1.88 -//     sLODPending
    1.89 -//     sLODProcessing
    1.90 -//     sCacheBytesRead
    1.91 -//     sCacheBytesWritten
    1.92 -//     sCacheReads
    1.93 -//     sCacheWrites
    1.94 -//     mLoadingMeshes                  none            rw.main.none, rw.main.mMeshMutex [4]
    1.95 +//     sBytesReceived                  none            rw.repo.none, ro.main.none [1]
    1.96 +//     sMeshRequestCount               "
    1.97 +//     sHTTPRequestCount               "
    1.98 +//     sHTTPLargeRequestCount          "
    1.99 +//     sHTTPRetryCount                 "
   1.100 +//     sHTTPErrorCount                 "
   1.101 +//     sLODPending                     mMeshMutex [4]  rw.main.mMeshMutex
   1.102 +//     sLODProcessing                  Repo::mMutex    rw.any.Repo::mMutex
   1.103 +//     sCacheBytesRead                 none            rw.repo.none, ro.main.none [1]
   1.104 +//     sCacheBytesWritten              "
   1.105 +//     sCacheReads                     "
   1.106 +//     sCacheWrites                    "
   1.107 +//     mLoadingMeshes                  mMeshMutex [4]  rw.main.none, rw.any.mMeshMutex
   1.108  //     mSkinMap                        none            rw.main.none
   1.109  //     mDecompositionMap               none            rw.main.none
   1.110  //     mPendingRequests                mMeshMutex [4]  rw.main.mMeshMutex
   1.111 @@ -199,25 +243,18 @@
   1.112  //     sMaxConcurrentRequests   mMutex        wo.main.none, ro.repo.none, ro.main.mMutex
   1.113  //     mMeshHeader              mHeaderMutex  rw.repo.mHeaderMutex, ro.main.mHeaderMutex, ro.main.none [0]
   1.114  //     mMeshHeaderSize          mHeaderMutex  rw.repo.mHeaderMutex
   1.115 -//     mSkinRequests            none          rw.repo.none, rw.main.none [0]
   1.116 -//     mSkinInfoQ               none          rw.repo.none, rw.main.none [0]
   1.117 -//     mDecompositionRequests   none          rw.repo.none, rw.main.none [0]
   1.118 -//     mPhysicsShapeRequests    none          rw.repo.none, rw.main.none [0]
   1.119 -//     mDecompositionQ          none          rw.repo.none, rw.main.none [0]
   1.120 -//     mHeaderReqQ              mMutex        ro.repo.none [3], rw.repo.mMutex, rw.any.mMutex
   1.121 -//     mLODReqQ                 mMutex        ro.repo.none [3], rw.repo.mMutex, rw.any.mMutex
   1.122 -//     mUnavailableQ            mMutex        rw.repo.none [0], ro.main.none [3], rw.main.mMutex
   1.123 -//     mLoadedQ                 mMutex        rw.repo.mMutex, ro.main.none [3], rw.main.mMutex
   1.124 +//     mSkinRequests            mMutex        rw.repo.mMutex, ro.repo.none [5]
   1.125 +//     mSkinInfoQ               none          rw.repo.none, rw.main.mMutex [0]
   1.126 +//     mDecompositionRequests   mMutex        rw.repo.mMutex, ro.repo.none [5]
   1.127 +//     mPhysicsShapeRequests    mMutex        rw.repo.mMutex, ro.repo.none [5]
   1.128 +//     mDecompositionQ          none          rw.repo.none, rw.main.mMutex [0]
   1.129 +//     mHeaderReqQ              mMutex        ro.repo.none [5], rw.repo.mMutex, rw.any.mMutex
   1.130 +//     mLODReqQ                 mMutex        ro.repo.none [5], rw.repo.mMutex, rw.any.mMutex
   1.131 +//     mUnavailableQ            mMutex        rw.repo.none [0], ro.main.none [5], rw.main.mMutex
   1.132 +//     mLoadedQ                 mMutex        rw.repo.mMutex, ro.main.none [5], rw.main.mMutex
   1.133  //     mPendingLOD              mMutex        rw.repo.mMutex, rw.any.mMutex
   1.134  //     mHttp*                   none          rw.repo.none
   1.135  //
   1.136 -//   LLPhysicsDecomp:
   1.137 -//    
   1.138 -//     mRequestQ
   1.139 -//     mCurRequest
   1.140 -//     mCompletedQ
   1.141 -//
   1.142 -//
   1.143  // QA/Development Testing
   1.144  //
   1.145  //   Debug variable 'MeshUploadFakeErrors' takes a mask of bits that will
   1.146 @@ -230,15 +267,27 @@
   1.147  //                   locally-generated 500 status.
   1.148  //   0x08            As with 0x04 but for the upload operation.
   1.149  //
   1.150 +// *TODO:  Work list for followup actions:
   1.151 +//   * Review anything marked as unsafe above, verify if there are real issues.
   1.152 +//   * See if we can put ::run() into a hard sleep.  May not actually perform better
   1.153 +//     than the current scheme so be prepared for disappointment.  You'll likely
   1.154 +//     need to introduce a condition variable class that references a mutex in
   1.155 +//     methods rather than derives from mutex which isn't correct.
   1.156 +//   * On upload failures, make more information available to the alerting
   1.157 +//     dialog.  Get the structured information going into the log into a
   1.158 +//     tree there.
   1.159 +//   * Header parse failures come without much explanation.  Elaborate.
   1.160 +//   * Need a final failure state for requests that are retried and just won't
   1.161 +//     complete.  We can fail a LOD request, others we don't.
   1.162  
   1.163  LLMeshRepository gMeshRepo;
   1.164  
   1.165  const S32 MESH_HEADER_SIZE = 4096;                      // Important:  assumption is that headers fit in this space
   1.166 -const S32 REQUEST_HIGH_WATER_MIN = 32;
   1.167 -const S32 REQUEST_HIGH_WATER_MAX = 200;
   1.168 +const S32 REQUEST_HIGH_WATER_MIN = 32;					// Limits for GetMesh regions
   1.169 +const S32 REQUEST_HIGH_WATER_MAX = 150;					// Should remain under 2X throttle
   1.170  const S32 REQUEST_LOW_WATER_MIN = 16;
   1.171 -const S32 REQUEST_LOW_WATER_MAX = 100;
   1.172 -const S32 REQUEST2_HIGH_WATER_MIN = 32;
   1.173 +const S32 REQUEST_LOW_WATER_MAX = 75;
   1.174 +const S32 REQUEST2_HIGH_WATER_MIN = 32;					// Limits for GetMesh2 regions
   1.175  const S32 REQUEST2_HIGH_WATER_MAX = 80;
   1.176  const S32 REQUEST2_LOW_WATER_MIN = 16;
   1.177  const S32 REQUEST2_LOW_WATER_MAX = 40;
   1.178 @@ -269,7 +318,7 @@
   1.179  U32 LLMeshRepository::sCacheWrites = 0;
   1.180  U32 LLMeshRepository::sMaxLockHoldoffs = 0;
   1.181  
   1.182 -LLDeadmanTimer LLMeshRepository::sQuiescentTimer(15.0, true);	// true -> gather cpu metrics
   1.183 +LLDeadmanTimer LLMeshRepository::sQuiescentTimer(15.0, false);	// true -> gather cpu metrics
   1.184  
   1.185  	
   1.186  static S32 dump_num = 0;
   1.187 @@ -703,7 +752,7 @@
   1.188  
   1.189  	while (!LLApp::isQuitting())
   1.190  	{
   1.191 -		// *TODO:  Revise sleep/wake strategy and try to move away'
   1.192 +		// *TODO:  Revise sleep/wake strategy and try to move away
   1.193  		// from polling operations in this thread.  We can sleep
   1.194  		// this thread hard when:
   1.195  		// * All Http requests are serviced
   1.196 @@ -714,7 +763,8 @@
   1.197  		// * Physics shape request queue empty
   1.198  		// We wake the thread when any of the above become untrue.
   1.199  		// Will likely need a correctly-implemented condition variable to do this.
   1.200 -
   1.201 +		// On the other hand, this may actually be an effective and efficient scheme...
   1.202 +		
   1.203  		mSignal->wait();
   1.204  
   1.205  		if (LLApp::isQuitting())
   1.206 @@ -810,7 +860,7 @@
   1.207  
   1.208  			// holding lock, try next list
   1.209  			// *TODO:  For UI/debug-oriented lists, we might drop the fine-
   1.210 -			// grained locking as there's lowered expectations of smoothness
   1.211 +			// grained locking as there's a lowered expectation of smoothness
   1.212  			// in these cases.
   1.213  			if (! mDecompositionRequests.empty() && mHttpRequestSet.size() < sRequestHighWater)
   1.214  			{
   1.215 @@ -2303,24 +2353,26 @@
   1.216  
   1.217  void LLMeshRepoThread::notifyLoadedMeshes()
   1.218  {
   1.219 +	bool update_metrics(false);
   1.220 +	
   1.221  	if (!mMutex)
   1.222  	{
   1.223  		return;
   1.224  	}
   1.225  
   1.226 -	if (!mLoadedQ.empty() || !mUnavailableQ.empty())
   1.227 -	{
   1.228 -		// Ping time-to-load metrics for mesh download operations.
   1.229 -		LLMeshRepository::metricsProgress(0);
   1.230 -	}
   1.231 -	
   1.232  	while (!mLoadedQ.empty())
   1.233  	{
   1.234  		mMutex->lock();
   1.235 +		if (mLoadedQ.empty())
   1.236 +		{
   1.237 +			mMutex->unlock();
   1.238 +			break;
   1.239 +		}
   1.240  		LoadedMesh mesh = mLoadedQ.front();
   1.241  		mLoadedQ.pop();
   1.242  		mMutex->unlock();
   1.243  		
   1.244 +		update_metrics = true;
   1.245  		if (mesh.mVolume && mesh.mVolume->getNumVolumeFaces() > 0)
   1.246  		{
   1.247  			gMeshRepo.notifyMeshLoaded(mesh.mMeshParams, mesh.mVolume);
   1.248 @@ -2335,10 +2387,17 @@
   1.249  	while (!mUnavailableQ.empty())
   1.250  	{
   1.251  		mMutex->lock();
   1.252 +		if (mUnavailableQ.empty())
   1.253 +		{
   1.254 +			mMutex->unlock();
   1.255 +			break;
   1.256 +		}
   1.257 +		
   1.258  		LODRequest req = mUnavailableQ.front();
   1.259  		mUnavailableQ.pop();
   1.260  		mMutex->unlock();
   1.261 -		
   1.262 +
   1.263 +		update_metrics = true;
   1.264  		gMeshRepo.notifyMeshUnavailable(req.mMeshParams, req.mLOD);
   1.265  	}
   1.266  
   1.267 @@ -2353,6 +2412,13 @@
   1.268  		gMeshRepo.notifyDecompositionReceived(mDecompositionQ.front());
   1.269  		mDecompositionQ.pop();
   1.270  	}
   1.271 +
   1.272 +	if (update_metrics)
   1.273 +	{
   1.274 +		// Ping time-to-load metrics for mesh download operations.
   1.275 +		LLMeshRepository::metricsProgress(0);
   1.276 +	}
   1.277 +	
   1.278  }
   1.279  
   1.280  S32 LLMeshRepoThread::getActualMeshLOD(const LLVolumeParams& mesh_params, S32 lod) 
   1.281 @@ -2461,6 +2527,12 @@
   1.282  		// speculative loads aren't done.
   1.283  		static const LLCore::HttpStatus par_status(HTTP_PARTIAL_CONTENT);
   1.284  
   1.285 +		if (par_status != status)
   1.286 +		{
   1.287 +			LL_WARNS_ONCE(LOG_MESH) << "Non-206 successful status received for fetch:  "
   1.288 +									<< status.toHex() << LL_ENDL;
   1.289 +		}
   1.290 +		
   1.291  		LLCore::BufferArray * body(response->getBody());
   1.292  		S32 data_size(body ? body->size() : 0);
   1.293  		U8 * data(NULL);
   1.294 @@ -2995,7 +3067,8 @@
   1.295  	}
   1.296  	else
   1.297  	{
   1.298 -		// GetMesh2 operation with keepalives, etc.
   1.299 +		// GetMesh2 operation with keepalives, etc.  With pipelining,
   1.300 +		// we'll increase this.
   1.301  		LLMeshRepoThread::sMaxConcurrentRequests = gSavedSettings.getU32("Mesh2MaxConcurrentRequests");
   1.302  		LLMeshRepoThread::sRequestHighWater = llclamp(5 * S32(LLMeshRepoThread::sMaxConcurrentRequests),
   1.303  													  REQUEST2_HIGH_WATER_MIN,
   1.304 @@ -3083,7 +3156,10 @@
   1.305  	mDecompThread->notifyCompleted();
   1.306  
   1.307  	// For major operations, attempt to get the required locks
   1.308 -	// without blocking and punt if they're not available.
   1.309 +	// without blocking and punt if they're not available.  The
   1.310 +	// longest run of holdoffs is kept in sMaxLockHoldoffs just
   1.311 +	// to collect the data.  In testing, I've never seen a value
   1.312 +	// greater than 2 (written to log on exit).
   1.313  	{
   1.314  		LLMutexTrylock lock1(mMeshMutex);
   1.315  		LLMutexTrylock lock2(mThread->mMutex);

mercurial