SH-4411 Thread/mutex rework between main and worker thread

Tue, 06 Aug 2013 18:05:34 -0400

author
Monty Brandenberg <monty@lindenlab.com>
date
Tue, 06 Aug 2013 18:05:34 -0400
changeset 40704
1d61fa020f51
parent 40703
cbf70fcee978
child 40705
4daab1adb0c3

SH-4411 Thread/mutex rework between main and worker thread
Have the ::notifyLoadedMeshes() method doing correct locking
and stall avoidance at the same time. This method now does
lazy mutex lock acquisition (trylock()) and if it fails on
either, it gives up and comes back later. Capture the maximum
number of sequential failures and report this at the end of
the run in the log. (So far, with big mesh regions, I've
only seen 1s and 2s.) Locking/mutex requirements sorted in
other locations as well. LLMutex gets trylock() method as
well as new LLMutexTrylock scoped locking class. Clean up
some documentation, more to do.

indra/llcommon/llthread.cpp file | annotate | diff | revisions
indra/llcommon/llthread.h file | annotate | diff | revisions
indra/newview/llmeshrepository.cpp file | annotate | diff | revisions
indra/newview/llmeshrepository.h file | annotate | diff | revisions
     1.1 --- a/indra/llcommon/llthread.cpp	Mon Aug 05 19:04:08 2013 -0400
     1.2 +++ b/indra/llcommon/llthread.cpp	Tue Aug 06 18:05:34 2013 -0400
     1.3 @@ -3,7 +3,7 @@
     1.4   *
     1.5   * $LicenseInfo:firstyear=2004&license=viewerlgpl$
     1.6   * Second Life Viewer Source Code
     1.7 - * Copyright (C) 2010, Linden Research, Inc.
     1.8 + * Copyright (C) 2010-2013, Linden Research, Inc.
     1.9   * 
    1.10   * This library is free software; you can redistribute it and/or
    1.11   * modify it under the terms of the GNU Lesser General Public
    1.12 @@ -372,6 +372,36 @@
    1.13  #endif
    1.14  }
    1.15  
    1.16 +bool LLMutex::trylock()
    1.17 +{
    1.18 +	if(isSelfLocked())
    1.19 +	{ //redundant lock
    1.20 +		mCount++;
    1.21 +		return true;
    1.22 +	}
    1.23 +	
    1.24 +	apr_status_t status(apr_thread_mutex_trylock(mAPRMutexp));
    1.25 +	if (APR_STATUS_IS_EBUSY(status))
    1.26 +	{
    1.27 +		return false;
    1.28 +	}
    1.29 +	
    1.30 +#if MUTEX_DEBUG
    1.31 +	// Have to have the lock before we can access the debug info
    1.32 +	U32 id = LLThread::currentID();
    1.33 +	if (mIsLocked[id] != FALSE)
    1.34 +		llerrs << "Already locked in Thread: " << id << llendl;
    1.35 +	mIsLocked[id] = TRUE;
    1.36 +#endif
    1.37 +
    1.38 +#if LL_DARWIN
    1.39 +	mLockingThread = LLThread::currentID();
    1.40 +#else
    1.41 +	mLockingThread = sThreadID;
    1.42 +#endif
    1.43 +	return true;
    1.44 +}
    1.45 +
    1.46  void LLMutex::unlock()
    1.47  {
    1.48  	if (mCount > 0)
     2.1 --- a/indra/llcommon/llthread.h	Mon Aug 05 19:04:08 2013 -0400
     2.2 +++ b/indra/llcommon/llthread.h	Tue Aug 06 18:05:34 2013 -0400
     2.3 @@ -4,7 +4,7 @@
     2.4   *
     2.5   * $LicenseInfo:firstyear=2004&license=viewerlgpl$
     2.6   * Second Life Viewer Source Code
     2.7 - * Copyright (C) 2010, Linden Research, Inc.
     2.8 + * Copyright (C) 2010-2013, Linden Research, Inc.
     2.9   * 
    2.10   * This library is free software; you can redistribute it and/or
    2.11   * modify it under the terms of the GNU Lesser General Public
    2.12 @@ -156,7 +156,8 @@
    2.13  	virtual ~LLMutex();
    2.14  	
    2.15  	void lock();		// blocks
    2.16 -	void unlock();
    2.17 +	bool trylock();		// non-blocking, returns true if lock held.
    2.18 +	void unlock();		// undefined behavior when called on mutex not being held
    2.19  	bool isLocked(); 	// non-blocking, but does do a lock/unlock so not free
    2.20  	bool isSelfLocked(); //return true if locked in a same thread
    2.21  	U32 lockingThread() const; //get ID of locking thread
    2.22 @@ -174,6 +175,8 @@
    2.23  #endif
    2.24  };
    2.25  
    2.26 +//============================================================================
    2.27 +
    2.28  // Actually a condition/mutex pair (since each condition needs to be associated with a mutex).
    2.29  class LL_COMMON_API LLCondition : public LLMutex
    2.30  {
    2.31 @@ -189,6 +192,8 @@
    2.32  	apr_thread_cond_t *mAPRCondp;
    2.33  };
    2.34  
    2.35 +//============================================================================
    2.36 +
    2.37  class LLMutexLock
    2.38  {
    2.39  public:
    2.40 @@ -210,6 +215,43 @@
    2.41  
    2.42  //============================================================================
    2.43  
    2.44 +// Scoped locking class similar in function to LLMutexLock but uses
    2.45 +// the trylock() method to conditionally acquire lock without
    2.46 +// blocking.  Caller resolves the resulting condition by calling
    2.47 +// the isLocked() method and either punts or continues as indicated.
    2.48 +//
    2.49 +// Mostly of interest to callers needing to avoid stalls who can
    2.50 +// guarantee another attempt at a later time.
    2.51 +
    2.52 +class LLMutexTrylock
    2.53 +{
    2.54 +public:
    2.55 +	LLMutexTrylock(LLMutex* mutex)
    2.56 +		: mMutex(mutex),
    2.57 +		  mLocked(false)
    2.58 +	{
    2.59 +		if (mMutex)
    2.60 +			mLocked = mMutex->trylock();
    2.61 +	}
    2.62 +
    2.63 +	~LLMutexTrylock()
    2.64 +	{
    2.65 +		if (mMutex && mLocked)
    2.66 +			mMutex->unlock();
    2.67 +	}
    2.68 +
    2.69 +	bool isLocked() const
    2.70 +	{
    2.71 +		return mLocked;
    2.72 +	}
    2.73 +	
    2.74 +private:
    2.75 +	LLMutex*	mMutex;
    2.76 +	bool		mLocked;
    2.77 +};
    2.78 +
    2.79 +//============================================================================
    2.80 +
    2.81  void LLThread::lockData()
    2.82  {
    2.83  	mDataLock->lock();
     3.1 --- a/indra/newview/llmeshrepository.cpp	Mon Aug 05 19:04:08 2013 -0400
     3.2 +++ b/indra/newview/llmeshrepository.cpp	Tue Aug 06 18:05:34 2013 -0400
     3.3 @@ -197,7 +197,6 @@
     3.4  //     sActiveHeaderRequests    mMutex        rw.any.mMutex, ro.repo.none [1]
     3.5  //     sActiveLODRequests       mMutex        rw.any.mMutex, ro.repo.none [1]
     3.6  //     sMaxConcurrentRequests   mMutex        wo.main.none, ro.repo.none, ro.main.mMutex
     3.7 -//     mWaiting                 mMutex        rw.repo.none, ro.main.none [2] (race - hint)
     3.8  //     mMeshHeader              mHeaderMutex  rw.repo.mHeaderMutex, ro.main.mHeaderMutex, ro.main.none [0]
     3.9  //     mMeshHeaderSize          mHeaderMutex  rw.repo.mHeaderMutex
    3.10  //     mSkinRequests            none          rw.repo.none, rw.main.none [0]
    3.11 @@ -264,6 +263,7 @@
    3.12  U32 LLMeshRepository::sCacheBytesWritten = 0;
    3.13  U32 LLMeshRepository::sCacheReads = 0;
    3.14  U32 LLMeshRepository::sCacheWrites = 0;
    3.15 +U32 LLMeshRepository::sMaxLockHoldoffs = 0;
    3.16  
    3.17  LLDeadmanTimer LLMeshRepository::sQuiescentTimer(15.0, true);	// true -> gather cpu metrics
    3.18  
    3.19 @@ -288,7 +288,7 @@
    3.20  
    3.21  // Static data and functions to measure mesh load
    3.22  // time metrics for a new region scene.
    3.23 -static unsigned int metrics_teleport_start_count(0);
    3.24 +static unsigned int metrics_teleport_start_count = 0;
    3.25  boost::signals2::connection metrics_teleport_started_signal;
    3.26  static void teleport_started();
    3.27  static bool is_retryable(LLCore::HttpStatus status);
    3.28 @@ -396,6 +396,7 @@
    3.29  //     LLMeshSkinInfoHandler
    3.30  //     LLMeshDecompositionHandler
    3.31  //     LLMeshPhysicsShapeHandler
    3.32 +//   LLMeshUploadThread
    3.33  
    3.34  class LLMeshHandlerBase : public LLCore::HttpHandler
    3.35  {
    3.36 @@ -624,7 +625,6 @@
    3.37  
    3.38  LLMeshRepoThread::LLMeshRepoThread()
    3.39  : LLThread("mesh repo"),
    3.40 -  mWaiting(false),
    3.41    mHttpRequest(NULL),
    3.42    mHttpOptions(NULL),
    3.43    mHttpLargeOptions(NULL),
    3.44 @@ -654,6 +654,7 @@
    3.45  {
    3.46  	LL_INFOS(LOG_MESH) << "Small GETs issued:  " << LLMeshRepository::sHTTPRequestCount
    3.47  					   << ", Large GETs issued:  " << LLMeshRepository::sHTTPLargeRequestCount
    3.48 +					   << ", Max Lock Holdoffs:  " << LLMeshRepository::sMaxLockHoldoffs
    3.49  					   << LL_ENDL;
    3.50  
    3.51  	for (http_request_set::iterator iter(mHttpRequestSet.begin());
    3.52 @@ -698,138 +699,171 @@
    3.53  
    3.54  	while (!LLApp::isQuitting())
    3.55  	{
    3.56 +		// *TODO:  Revise sleep/wake strategy and try to move away'
    3.57 +		// from polling operations in this thread.  We can sleep
    3.58 +		// this thread hard when:
    3.59 +		// * All Http requests are serviced
    3.60 +		// * LOD request queue empty
    3.61 +		// * Header request queue empty
    3.62 +		// * Skin info request queue empty
    3.63 +		// * Decomposition request queue empty
    3.64 +		// * Physics shape request queue empty
    3.65 +		// We wake the thread when any of the above become untrue.
    3.66 +		// Will likely need a correctly-implemented condition variable to do this.
    3.67 +
    3.68 +		mSignal->wait();
    3.69 +
    3.70 +		if (LLApp::isQuitting())
    3.71 +		{
    3.72 +			break;
    3.73 +		}
    3.74 +		
    3.75  		if (! mHttpRequestSet.empty())
    3.76  		{
    3.77  			// Dispatch all HttpHandler notifications
    3.78  			mHttpRequest->update(0L);
    3.79  		}
    3.80 -
    3.81 -		mWaiting = true;
    3.82 -		mSignal->wait();
    3.83 -		mWaiting = false;
    3.84 -		
    3.85 -		if (! LLApp::isQuitting())
    3.86 +		sRequestWaterLevel = mHttpRequestSet.size();			// Stats data update
    3.87 +			
    3.88 +		// NOTE: order of queue processing intentionally favors LOD requests over header requests
    3.89 +
    3.90 +		while (!mLODReqQ.empty() && mHttpRequestSet.size() < sRequestHighWater)
    3.91  		{
    3.92 -			// NOTE: order of queue processing intentionally favors LOD requests over header requests
    3.93 -
    3.94 -			sRequestWaterLevel = mHttpRequestSet.size();
    3.95 -			while (!mLODReqQ.empty() && mHttpRequestSet.size() < sRequestHighWater)
    3.96 +			if (! mMutex)
    3.97  			{
    3.98 -				if (! mMutex)
    3.99 +				break;
   3.100 +			}
   3.101 +			mMutex->lock();
   3.102 +			LODRequest req = mLODReqQ.front();
   3.103 +			mLODReqQ.pop();
   3.104 +			LLMeshRepository::sLODProcessing--;
   3.105 +			mMutex->unlock();
   3.106 +			if (!fetchMeshLOD(req.mMeshParams, req.mLOD))//failed, resubmit
   3.107 +			{
   3.108 +				mMutex->lock();
   3.109 +				mLODReqQ.push(req) ; 
   3.110 +				++LLMeshRepository::sLODProcessing;
   3.111 +				mMutex->unlock();
   3.112 +			}
   3.113 +		}
   3.114 +
   3.115 +		while (!mHeaderReqQ.empty() && mHttpRequestSet.size() < sRequestHighWater)
   3.116 +		{
   3.117 +			if (! mMutex)
   3.118 +			{
   3.119 +				break;
   3.120 +			}
   3.121 +			mMutex->lock();
   3.122 +			HeaderRequest req = mHeaderReqQ.front();
   3.123 +			mHeaderReqQ.pop();
   3.124 +			mMutex->unlock();
   3.125 +			if (!fetchMeshHeader(req.mMeshParams))//failed, resubmit
   3.126 +			{
   3.127 +				mMutex->lock();
   3.128 +				mHeaderReqQ.push(req) ;
   3.129 +				mMutex->unlock();
   3.130 +			}
   3.131 +		}
   3.132 +
   3.133 +		// For the final three request lists, similar goal to above but
   3.134 +		// slightly different queue structures.  Stay off the mutex when
   3.135 +		// performing long-duration actions.
   3.136 +
   3.137 +		if (mHttpRequestSet.size() < sRequestHighWater
   3.138 +			&& (! mSkinRequests.empty()
   3.139 +				|| ! mDecompositionRequests.empty()
   3.140 +				|| ! mPhysicsShapeRequests.empty()))
   3.141 +		{
   3.142 +			// Something to do probably, lock and double-check.  We don't want
   3.143 +			// to hold the lock long here.  That will stall main thread activities
   3.144 +			// so we bounce it.
   3.145 +
   3.146 +			mMutex->lock();
   3.147 +			if (! mSkinRequests.empty() && mHttpRequestSet.size() < sRequestHighWater)
   3.148 +			{
   3.149 +				std::set<LLUUID> incomplete;
   3.150 +				std::set<LLUUID>::iterator iter(mSkinRequests.begin());
   3.151 +				while (iter != mSkinRequests.end() && mHttpRequestSet.size() < sRequestHighWater)
   3.152  				{
   3.153 -					break;
   3.154 +					LLUUID mesh_id = *iter;
   3.155 +					mSkinRequests.erase(iter);
   3.156 +					mMutex->unlock();
   3.157 +
   3.158 +					if (! fetchMeshSkinInfo(mesh_id))
   3.159 +					{
   3.160 +						incomplete.insert(mesh_id);
   3.161 +					}
   3.162 +
   3.163 +					mMutex->lock();
   3.164 +					iter = mSkinRequests.begin();
   3.165  				}
   3.166 -				mMutex->lock();
   3.167 -				LODRequest req = mLODReqQ.front();
   3.168 -				mLODReqQ.pop();
   3.169 -				LLMeshRepository::sLODProcessing--;
   3.170 -				mMutex->unlock();
   3.171 -				if (!fetchMeshLOD(req.mMeshParams, req.mLOD))//failed, resubmit
   3.172 +
   3.173 +				if (! incomplete.empty())
   3.174  				{
   3.175 -					mMutex->lock();
   3.176 -					mLODReqQ.push(req) ; 
   3.177 -					++LLMeshRepository::sLODProcessing;
   3.178 -					mMutex->unlock();
   3.179 +					mSkinRequests.insert(incomplete.begin(), incomplete.end());
   3.180  				}
   3.181  			}
   3.182  
   3.183 -			while (!mHeaderReqQ.empty() && mHttpRequestSet.size() < sRequestHighWater)
   3.184 -			{
   3.185 -				if (! mMutex)
   3.186 -				{
   3.187 -					break;
   3.188 -				}
   3.189 -				mMutex->lock();
   3.190 -				HeaderRequest req = mHeaderReqQ.front();
   3.191 -				mHeaderReqQ.pop();
   3.192 -				mMutex->unlock();
   3.193 -				if (!fetchMeshHeader(req.mMeshParams))//failed, resubmit
   3.194 -				{
   3.195 -					mMutex->lock();
   3.196 -					mHeaderReqQ.push(req) ;
   3.197 -					mMutex->unlock();
   3.198 -				}
   3.199 -			}
   3.200 -
   3.201 -			// For the final three request lists, if we scan any part of one
   3.202 -			// list, we scan the entire thing.  This gets us through any requests
   3.203 -			// which can be resolved in the cache.  It also keeps the request
   3.204 -			// set somewhat fresher otherwise items at the end of the set
   3.205 -			// order will lose.
   3.206 -			if (! mSkinRequests.empty() && mHttpRequestSet.size() < sRequestHighWater)
   3.207 -			{
   3.208 -				// *FIXME:  this really does need a lock as do the following ones
   3.209 -				std::set<LLUUID> incomplete;
   3.210 -				for (std::set<LLUUID>::iterator iter = mSkinRequests.begin(); iter != mSkinRequests.end(); ++iter)
   3.211 -				{
   3.212 -					if (mHttpRequestSet.size() < sRequestHighWater)
   3.213 -					{
   3.214 -						LLUUID mesh_id = *iter;
   3.215 -						if (!fetchMeshSkinInfo(mesh_id))
   3.216 -						{
   3.217 -							incomplete.insert(mesh_id);
   3.218 -						}
   3.219 -					}
   3.220 -					else
   3.221 -					{
   3.222 -						// Hit high-water mark, copy remaining to incomplete.
   3.223 -						incomplete.insert(iter, mSkinRequests.end());
   3.224 -						break;
   3.225 -					}
   3.226 -				}
   3.227 -				mSkinRequests.swap(incomplete);
   3.228 -			}
   3.229 -
   3.230 +			// holding lock, try next list
   3.231 +			// *TODO:  For UI/debug-oriented lists, we might drop the fine-
   3.232 +			// grained locking as there's lowered expectations of smoothness
   3.233 +			// in these cases.
   3.234  			if (! mDecompositionRequests.empty() && mHttpRequestSet.size() < sRequestHighWater)
   3.235  			{
   3.236  				std::set<LLUUID> incomplete;
   3.237 -				for (std::set<LLUUID>::iterator iter = mDecompositionRequests.begin(); iter != mDecompositionRequests.end(); ++iter)
   3.238 +				std::set<LLUUID>::iterator iter(mDecompositionRequests.begin());
   3.239 +				while (iter != mDecompositionRequests.end() && mHttpRequestSet.size() < sRequestHighWater)
   3.240  				{
   3.241 -					if (mHttpRequestSet.size() < sRequestHighWater)
   3.242 +					LLUUID mesh_id = *iter;
   3.243 +					mDecompositionRequests.erase(iter);
   3.244 +					mMutex->unlock();
   3.245 +					
   3.246 +					if (! fetchMeshDecomposition(mesh_id))
   3.247  					{
   3.248 -						LLUUID mesh_id = *iter;
   3.249 -						if (!fetchMeshDecomposition(mesh_id))
   3.250 -						{
   3.251 -							incomplete.insert(mesh_id);
   3.252 -						}
   3.253 +						incomplete.insert(mesh_id);
   3.254  					}
   3.255 -					else
   3.256 -					{
   3.257 -						// Hit high-water mark, copy remaining to incomplete.
   3.258 -						incomplete.insert(iter, mDecompositionRequests.end());
   3.259 -						break;
   3.260 -					}
   3.261 +
   3.262 +					mMutex->lock();
   3.263 +					iter = mDecompositionRequests.begin();
   3.264  				}
   3.265 -				mDecompositionRequests.swap(incomplete);
   3.266 +
   3.267 +				if (! incomplete.empty())
   3.268 +				{
   3.269 +					mDecompositionRequests.insert(incomplete.begin(), incomplete.end());
   3.270 +				}
   3.271  			}
   3.272  
   3.273 +			// holding lock, final list
   3.274  			if (! mPhysicsShapeRequests.empty() && mHttpRequestSet.size() < sRequestHighWater)
   3.275  			{
   3.276  				std::set<LLUUID> incomplete;
   3.277 -				for (std::set<LLUUID>::iterator iter = mPhysicsShapeRequests.begin(); iter != mPhysicsShapeRequests.end(); ++iter)
   3.278 +				std::set<LLUUID>::iterator iter(mPhysicsShapeRequests.begin());
   3.279 +				while (iter != mPhysicsShapeRequests.end() && mHttpRequestSet.size() < sRequestHighWater)
   3.280  				{
   3.281 -					if (mHttpRequestSet.size() < sRequestHighWater)
   3.282 +					LLUUID mesh_id = *iter;
   3.283 +					mPhysicsShapeRequests.erase(iter);
   3.284 +					mMutex->unlock();
   3.285 +					
   3.286 +					if (! fetchMeshPhysicsShape(mesh_id))
   3.287  					{
   3.288 -						LLUUID mesh_id = *iter;
   3.289 -						if (!fetchMeshPhysicsShape(mesh_id))
   3.290 -						{
   3.291 -							incomplete.insert(mesh_id);
   3.292 -						}
   3.293 +						incomplete.insert(mesh_id);
   3.294  					}
   3.295 -					else
   3.296 -					{
   3.297 -						// Hit high-water mark, copy remaining to incomplete.
   3.298 -						incomplete.insert(iter, mPhysicsShapeRequests.end());
   3.299 -						break;
   3.300 -					}
   3.301 +
   3.302 +					mMutex->lock();
   3.303 +					iter = mPhysicsShapeRequests.begin();
   3.304  				}
   3.305 -				mPhysicsShapeRequests.swap(incomplete);
   3.306 +
   3.307 +				if (! incomplete.empty())
   3.308 +				{
   3.309 +					mPhysicsShapeRequests.insert(incomplete.begin(), incomplete.end());
   3.310 +				}
   3.311  			}
   3.312 -
   3.313 -			// For dev purposes, a dynamic change could make this false
   3.314 -			// and that shouldn't assert.
   3.315 -			// llassert_always(mHttpRequestSet.size() <= sRequestHighWater);
   3.316 +			mMutex->unlock();
   3.317  		}
   3.318 +
   3.319 +		// For dev purposes only.  A dynamic change could make this false
   3.320 +		// and that shouldn't assert.
   3.321 +		// llassert_always(mHttpRequestSet.size() <= sRequestHighWater);
   3.322  	}
   3.323  	
   3.324  	if (mSignal->isLocked())
   3.325 @@ -844,18 +878,21 @@
   3.326  	}
   3.327  }
   3.328  
   3.329 +// Mutex:  LLMeshRepoThread::mMutex must be held on entry
   3.330  void LLMeshRepoThread::loadMeshSkinInfo(const LLUUID& mesh_id)
   3.331 -{ //protected by mSignal, no locking needed here
   3.332 +{
   3.333  	mSkinRequests.insert(mesh_id);
   3.334  }
   3.335  
   3.336 +// Mutex:  LLMeshRepoThread::mMutex must be held on entry
   3.337  void LLMeshRepoThread::loadMeshDecomposition(const LLUUID& mesh_id)
   3.338 -{ //protected by mSignal, no locking needed here
   3.339 +{
   3.340  	mDecompositionRequests.insert(mesh_id);
   3.341  }
   3.342  
   3.343 +// Mutex:  LLMeshRepoThread::mMutex must be held on entry
   3.344  void LLMeshRepoThread::loadMeshPhysicsShape(const LLUUID& mesh_id)
   3.345 -{ //protected by mSignal, no locking needed here
   3.346 +{
   3.347  	mPhysicsShapeRequests.insert(mesh_id);
   3.348  }
   3.349  
   3.350 @@ -2406,13 +2443,18 @@
   3.351  	}
   3.352  	else
   3.353  	{
   3.354 -		// From texture fetch code and applies here:
   3.355 +		// From texture fetch code and may apply here:
   3.356  		//
   3.357  		// A warning about partial (HTTP 206) data.  Some grid services
   3.358  		// do *not* return a 'Content-Range' header in the response to
   3.359  		// Range requests with a 206 status.  We're forced to assume
   3.360  		// we get what we asked for in these cases until we can fix
   3.361  		// the services.
   3.362 +		//
   3.363 +		// May also need to deal with 200 status (full asset returned
   3.364 +		// rather than partial) and 416 (request completely unsatisfyable).
   3.365 +		// Always been exposed to these but are less likely here where
   3.366 +		// speculative loads aren't done.
   3.367  		static const LLCore::HttpStatus par_status(HTTP_PARTIAL_CONTENT);
   3.368  
   3.369  		LLCore::BufferArray * body(response->getBody());
   3.370 @@ -2422,7 +2464,9 @@
   3.371  		if (data_size > 0)
   3.372  		{
   3.373  			// *TODO: Try to get rid of data copying and add interfaces
   3.374 -			// that support BufferArray directly.
   3.375 +			// that support BufferArray directly.  Introduce a two-phase
   3.376 +			// handler, optional first that takes a body, fallback second
   3.377 +			// that requires a temporary allocation and data copy.
   3.378  			data = new U8[data_size];
   3.379  			body->read(0, (char *) data, data_size);
   3.380  			LLMeshRepository::sBytesReceived += data_size;
   3.381 @@ -2459,6 +2503,10 @@
   3.382  {
   3.383  	if (is_retryable(status))
   3.384  	{
   3.385 +		// *TODO:  This and the other processFailure() methods should
   3.386 +		// probably just fail hard (as llcorehttp has done the retries).
   3.387 +		// Or we could implement a slow/forever retry class.
   3.388 +		
   3.389  		LL_WARNS(LOG_MESH) << "Error during mesh header handling.  Reason:  " << status.toString()
   3.390  						   << " (" << status.toHex() << ").  Retrying."
   3.391  						   << LL_ENDL;
   3.392 @@ -3026,32 +3074,40 @@
   3.393  
   3.394  	//call completed callbacks on finished decompositions
   3.395  	mDecompThread->notifyCompleted();
   3.396 -	
   3.397 -	if (!mThread->mWaiting && mPendingRequests.empty())
   3.398 -	{ //curl thread is churning, wait for it to go idle
   3.399 -		return;
   3.400 -	}
   3.401 -
   3.402 -	static std::string region_name("never name a region this");
   3.403 -
   3.404 -	if (gAgent.getRegion())
   3.405 -	{ //update capability url 
   3.406 -		if (gAgent.getRegion()->getName() != region_name && gAgent.getRegion()->capabilitiesReceived())
   3.407 +
   3.408 +	// For major operations, attempt to get the required locks
   3.409 +	// without blocking and punt if they're not available.
   3.410 +	{
   3.411 +		LLMutexTrylock lock1(mMeshMutex);
   3.412 +		LLMutexTrylock lock2(mThread->mMutex);
   3.413 +
   3.414 +		static U32 hold_offs(0);
   3.415 +		if (! lock1.isLocked() || ! lock2.isLocked())
   3.416  		{
   3.417 -			region_name = gAgent.getRegion()->getName();
   3.418 -			mGetMeshCapability = gAgent.getRegion()->getCapability("GetMesh");
   3.419 -			mGetMesh2Capability = gAgent.getRegion()->getCapability("GetMesh2");
   3.420 -			mGetMeshVersion = mGetMesh2Capability.empty() ? 1 : 2;
   3.421 -			LL_DEBUGS(LOG_MESH) << "Retrieving caps for region '" << region_name
   3.422 -								<< "', GetMesh2:  " << mGetMesh2Capability
   3.423 -								<< ", GetMesh:  " << mGetMeshCapability
   3.424 -								<< LL_ENDL;
   3.425 +			// If we can't get the locks, skip and pick this up later.
   3.426 +			++hold_offs;
   3.427 +			sMaxLockHoldoffs = llmax(sMaxLockHoldoffs, hold_offs);
   3.428 +			return;
   3.429  		}
   3.430 -	}
   3.431 -
   3.432 -	{
   3.433 -		LLMutexLock lock1(mMeshMutex);
   3.434 -		LLMutexLock lock2(mThread->mMutex);
   3.435 +		hold_offs = 0;
   3.436 +		
   3.437 +		if (gAgent.getRegion())
   3.438 +		{
   3.439 +			// Update capability urls
   3.440 +			static std::string region_name("never name a region this");
   3.441 +			
   3.442 +			if (gAgent.getRegion()->getName() != region_name && gAgent.getRegion()->capabilitiesReceived())
   3.443 +			{
   3.444 +				region_name = gAgent.getRegion()->getName();
   3.445 +				mGetMeshCapability = gAgent.getRegion()->getCapability("GetMesh");
   3.446 +				mGetMesh2Capability = gAgent.getRegion()->getCapability("GetMesh2");
   3.447 +				mGetMeshVersion = mGetMesh2Capability.empty() ? 1 : 2;
   3.448 +				LL_DEBUGS(LOG_MESH) << "Retrieving caps for region '" << region_name
   3.449 +									<< "', GetMesh2:  " << mGetMesh2Capability
   3.450 +									<< ", GetMesh:  " << mGetMeshCapability
   3.451 +									<< LL_ENDL;
   3.452 +			}
   3.453 +		}
   3.454  		
   3.455  		//popup queued error messages from background threads
   3.456  		while (!mUploadErrorQ.empty())
     4.1 --- a/indra/newview/llmeshrepository.h	Mon Aug 05 19:04:08 2013 -0400
     4.2 +++ b/indra/newview/llmeshrepository.h	Tue Aug 06 18:05:34 2013 -0400
     4.3 @@ -230,8 +230,6 @@
     4.4  	LLMutex*	mHeaderMutex;
     4.5  	LLCondition* mSignal;
     4.6  
     4.7 -	volatile bool mWaiting;
     4.8 -
     4.9  	//map of known mesh headers
    4.10  	typedef std::map<LLUUID, LLSD> mesh_header_map;
    4.11  	mesh_header_map mMeshHeader;
    4.12 @@ -494,19 +492,20 @@
    4.13  
    4.14  	//metrics
    4.15  	static U32 sBytesReceived;
    4.16 -	static U32 sMeshRequestCount;
    4.17 -	static U32 sHTTPRequestCount;
    4.18 -	static U32 sHTTPLargeRequestCount;
    4.19 -	static U32 sHTTPRetryCount;
    4.20 -	static U32 sHTTPErrorCount;
    4.21 +	static U32 sMeshRequestCount;				// Total request count, http or cached, all component types
    4.22 +	static U32 sHTTPRequestCount;				// Http GETs issued (not large)
    4.23 +	static U32 sHTTPLargeRequestCount;			// Http GETs issued for large requests
    4.24 +	static U32 sHTTPRetryCount;					// Total request retries whether successful or failed
    4.25 +	static U32 sHTTPErrorCount;					// Requests ending in error
    4.26  	static U32 sLODPending;
    4.27  	static U32 sLODProcessing;
    4.28  	static U32 sCacheBytesRead;
    4.29  	static U32 sCacheBytesWritten;
    4.30 -	static U32 sCacheReads;
    4.31 +	static U32 sCacheReads;						
    4.32  	static U32 sCacheWrites;
    4.33 +	static U32 sMaxLockHoldoffs;				// Maximum sequential locking failures
    4.34  	
    4.35 -	static LLDeadmanTimer sQuiescentTimer;  // time-to-complete-mesh-downloads after significant events
    4.36 +	static LLDeadmanTimer sQuiescentTimer;		// Time-to-complete-mesh-downloads after significant events
    4.37  
    4.38  	static F32 getStreamingCost(LLSD& header, F32 radius, S32* bytes = NULL, S32* visible_bytes = NULL, S32 detail = -1, F32 *unscaled_value = NULL);
    4.39  

mercurial