indra/llcommon/llstring.cpp

Thu, 14 Jul 2011 17:34:37 -0700

author
Tank_Master
date
Thu, 14 Jul 2011 17:34:37 -0700
changeset 1628
907de988cd77
parent 1014
de6dfa8b8e78
child 1638
bd909582d0bf
permissions
-rw-r--r--

build with 2010 support (code changes still needed)

     1 /** 
     2  * @file llstring.cpp
     3  * @brief String utility functions and the std::string class.
     4  *
     5  * $LicenseInfo:firstyear=2001&license=viewergpl$
     6  * 
     7  * Copyright (c) 2001-2009, Linden Research, Inc.
     8  * 
     9  * Second Life Viewer Source Code
    10  * The source code in this file ("Source Code") is provided by Linden Lab
    11  * to you under the terms of the GNU General Public License, version 2.0
    12  * ("GPL"), unless you have obtained a separate licensing agreement
    13  * ("Other License"), formally executed by you and Linden Lab.  Terms of
    14  * the GPL can be found in doc/GPL-license.txt in this distribution, or
    15  * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
    16  * 
    17  * There are special exceptions to the terms and conditions of the GPL as
    18  * it is applied to this Source Code. View the full text of the exception
    19  * in the file doc/FLOSS-exception.txt in this software distribution, or
    20  * online at
    21  * http://secondlifegrid.net/programs/open_source/licensing/flossexception
    22  * 
    23  * By copying, modifying or distributing this software, you acknowledge
    24  * that you have read and understood your obligations described above,
    25  * and agree to abide by those obligations.
    26  * 
    27  * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
    28  * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
    29  * COMPLETENESS OR PERFORMANCE.
    30  * $/LicenseInfo$
    31  */
    33 #include "linden_common.h"
    35 #include "llstring.h"
    36 #include "llerror.h"
    38 #if LL_WINDOWS
    39 #define WIN32_LEAN_AND_MEAN
    40 #include <winsock2.h>
    41 #include <windows.h>
    42 #include <winnls.h> // for WideCharToMultiByte
    43 #endif
    45 std::string ll_safe_string(const char* in)
    46 {
    47 	if(in) return std::string(in);
    48 	return std::string();
    49 }
    51 std::string ll_safe_string(const char* in, S32 maxlen)
    52 {
    53 	if(in && maxlen > 0) return std::string(in, maxlen);
    54 	return std::string();
    55 }
    57 U8 hex_as_nybble(char hex)
    58 {
    59 	if((hex >= '0') && (hex <= '9'))
    60 	{
    61 		return (U8)(hex - '0');
    62 	}
    63 	else if((hex >= 'a') && (hex <='f'))
    64 	{
    65 		return (U8)(10 + hex - 'a');
    66 	}
    67 	else if((hex >= 'A') && (hex <='F'))
    68 	{
    69 		return (U8)(10 + hex - 'A');
    70 	}
    71 	return 0; // uh - oh, not hex any more...
    72 }
    75 bool _read_file_into_string(std::string& str, const std::string& filename)
    76 {
    77 	llifstream ifs(filename, llifstream::binary);
    78 	if (!ifs.is_open())
    79 	{
    80 		llinfos << "Unable to open file " << filename << llendl;
    81 		return false;
    82 	}
    84 	std::ostringstream oss;
    86 	oss << ifs.rdbuf();
    87 	str = oss.str();
    88 	ifs.close();
    89 	return true;
    90 }
    95 // See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
    96 // for the Unicode implementation - this doesn't match because it was written before finding
    97 // it.
   100 std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
   101 {
   102 	std::string utf8_str = wstring_to_utf8str(wstr);
   103 	s << utf8_str;
   104 	return s;
   105 }
   107 std::string rawstr_to_utf8(const std::string& raw)
   108 {
   109 	LLWString wstr(utf8str_to_wstring(raw));
   110 	return wstring_to_utf8str(wstr);
   111 }
   113 S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
   114 {
   115 	U32 cur_char = (U32)in_char;
   116 	char* base = outchars;
   117 	if (cur_char < 0x80)
   118 	{
   119 		*outchars++ = (U8)cur_char;
   120 	}
   121 	else if (cur_char < 0x800)
   122 	{
   123 		*outchars++ = 0xC0 | (cur_char >> 6);
   124 		*outchars++ = 0x80 | (cur_char & 0x3F);
   125 	}
   126 	else if (cur_char < 0x10000)
   127 	{
   128 		*outchars++ = 0xE0 | (cur_char >> 12);
   129 		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
   130 		*outchars++ = 0x80 | (cur_char & 0x3F);
   131 	}
   132 	else if (cur_char < 0x200000)
   133 	{
   134 		*outchars++ = 0xF0 | (cur_char >> 18);
   135 		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
   136 		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
   137 		*outchars++ = 0x80 | (cur_char & 0x3F);
   138 	}
   139 	else if (cur_char < 0x4000000)
   140 	{
   141 		*outchars++ = 0xF8 | (cur_char >> 24);
   142 		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
   143 		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
   144 		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
   145 		*outchars++ = 0x80 | (cur_char & 0x3F);
   146 	}
   147 	else if (cur_char < 0x80000000)
   148 	{
   149 		*outchars++ = 0xFC | (cur_char >> 30);
   150 		*outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
   151 		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
   152 		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
   153 		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
   154 		*outchars++ = 0x80 | (cur_char & 0x3F);
   155 	}
   156 	else
   157 	{
   158 		llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
   159 		*outchars++ = LL_UNKNOWN_CHAR;
   160 	}
   161 	return outchars - base;
   162 }	
   164 S32 utf16chars_to_wchar(const wchar_t* inchars, llwchar* outchar)
   165 {
   166 	const wchar_t* base = inchars;
   167 	wchar_t cur_char = *inchars++;
   168 	llwchar char32 = cur_char;
   169 	if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
   170 	{
   171 		// Surrogates
   172 		char32 = ((llwchar)(cur_char - 0xD800)) << 10;
   173 		cur_char = *inchars++;
   174 		char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
   175 	}
   176 	else
   177 	{
   178 		char32 = (llwchar)cur_char;
   179 	}
   180 	*outchar = char32;
   181 	return inchars - base;
   182 }
   184 llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
   185 {
   186 	llutf16string out;
   188 	S32 i = 0;
   189 	while (i < len)
   190 	{
   191 		U32 cur_char = utf32str[i];
   192 		if (cur_char > 0xFFFF)
   193 		{
   194 			out += (0xD7C0 + (cur_char >> 10));
   195 			out += (0xDC00 | (cur_char & 0x3FF));
   196 		}
   197 		else
   198 		{
   199 			out += cur_char;
   200 		}
   201 		i++;
   202 	}
   203 	return out;
   204 }
   206 llutf16string wstring_to_utf16str(const LLWString &utf32str)
   207 {
   208 	const S32 len = (S32)utf32str.length();
   209 	return wstring_to_utf16str(utf32str, len);
   210 }
   212 llutf16string utf8str_to_utf16str ( const std::string& utf8str )
   213 {
   214 	LLWString wstr = utf8str_to_wstring ( utf8str );
   215 	return wstring_to_utf16str ( wstr );
   216 }
   219 LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
   220 {
   221 	LLWString wout;
   222 	if((len <= 0) || utf16str.empty()) return wout;
   224 	S32 i = 0;
   225 	// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
   226 	const wchar_t* chars16 = &(*(utf16str.begin()));
   227 	while (i < len)
   228 	{
   229 		llwchar cur_char;
   230 		i += utf16chars_to_wchar(chars16+i, &cur_char);
   231 		wout += cur_char;
   232 	}
   233 	return wout;
   234 }
   236 LLWString utf16str_to_wstring(const llutf16string &utf16str)
   237 {
   238 	const S32 len = (S32)utf16str.length();
   239 	return utf16str_to_wstring(utf16str, len);
   240 }
   242 // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
   243 S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
   244 {
   245 	S32 surrogate_pairs = 0;
   246 	// ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
   247 	const wchar_t *const utf16_chars = &(*(utf16str.begin()));
   248 	S32 i = 0;
   249 	while (i < utf16_len)
   250 	{
   251 		const wchar_t c = utf16_chars[i++];
   252 		if (c >= 0xD800 && c <= 0xDBFF)		// See http://en.wikipedia.org/wiki/UTF-16
   253 		{   // Have first byte of a surrogate pair
   254 			if (i >= utf16_len)
   255 			{
   256 				break;
   257 			}
   258 			const wchar_t d = utf16_chars[i];
   259 			if (d >= 0xDC00 && d <= 0xDFFF)
   260 			{   // Have valid second byte of a surrogate pair
   261 				surrogate_pairs++;
   262 				i++;
   263 			}
   264 		}
   265 	}
   266 	return utf16_len - surrogate_pairs;
   267 }
   269 // Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
   270 S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
   271 {
   272 	const S32 end = llmin((S32)wstr.length(), woffset + wlen);
   273 	if (end < woffset)
   274 	{
   275 		return 0;
   276 	}
   277 	else
   278 	{
   279 		S32 length = end - woffset;
   280 		for (S32 i = woffset; i < end; i++)
   281 		{
   282 			if (wstr[i] >= 0x10000)
   283 			{
   284 				length++;
   285 			}
   286 		}
   287 		return length;
   288 	}
   289 }
   291 // Given a wstring and an offset in it, returns the length as wstring (i.e.,
   292 // number of llwchars) of the longest substring that starts at the offset
   293 // and whose equivalent utf-16 string does not exceeds the given utf16_length.
   294 S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned)
   295 {
   296 	const S32 end = wstr.length();
   297 	BOOL u = FALSE;
   298 	S32 n = woffset + utf16_length;
   299 	S32 i = woffset;
   300 	while (i < end)
   301 	{
   302 		if (wstr[i] >= 0x10000)
   303 		{
   304 			--n;
   305 		}
   306 		if (i >= n)
   307 		{
   308 			u = (i > n);
   309 			break;
   310 		}
   311 		i++;
   312 	}
   313 	if (unaligned)
   314 	{
   315 		*unaligned = u;
   316 	}
   317 	return i - woffset;
   318 }
   320 S32 wchar_utf8_length(const llwchar wc)
   321 {
   322 	if (wc < 0x80)
   323 	{
   324 		// This case will also catch negative values which are
   325 		// technically invalid.
   326 		return 1;
   327 	}
   328 	else if (wc < 0x800)
   329 	{
   330 		return 2;
   331 	}
   332 	else if (wc < 0x10000)
   333 	{
   334 		return 3;
   335 	}
   336 	else if (wc < 0x200000)
   337 	{
   338 		return 4;
   339 	}
   340 	else if (wc < 0x4000000)
   341 	{
   342 		return 5;
   343 	}
   344 	else
   345 	{
   346 		return 6;
   347 	}
   348 }
   351 S32 wstring_utf8_length(const LLWString& wstr)
   352 {
   353 	S32 len = 0;
   354 	for (S32 i = 0; i < (S32)wstr.length(); i++)
   355 	{
   356 		len += wchar_utf8_length(wstr[i]);
   357 	}
   358 	return len;
   359 }
   362 LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
   363 {
   364 	LLWString wout;
   366 	S32 i = 0;
   367 	while (i < len)
   368 	{
   369 		llwchar unichar;
   370 		U8 cur_char = utf8str[i];
   372 		if (cur_char < 0x80)
   373 		{
   374 			// Ascii character, just add it
   375 			unichar = cur_char;
   376 		}
   377 		else
   378 		{
   379 			S32 cont_bytes = 0;
   380 			if ((cur_char >> 5) == 0x6)			// Two byte UTF8 -> 1 UTF32
   381 			{
   382 				unichar = (0x1F&cur_char);
   383 				cont_bytes = 1;
   384 			}
   385 			else if ((cur_char >> 4) == 0xe)	// Three byte UTF8 -> 1 UTF32
   386 			{
   387 				unichar = (0x0F&cur_char);
   388 				cont_bytes = 2;
   389 			}
   390 			else if ((cur_char >> 3) == 0x1e)	// Four byte UTF8 -> 1 UTF32
   391 			{
   392 				unichar = (0x07&cur_char);
   393 				cont_bytes = 3;
   394 			}
   395 			else if ((cur_char >> 2) == 0x3e)	// Five byte UTF8 -> 1 UTF32
   396 			{
   397 				unichar = (0x03&cur_char);
   398 				cont_bytes = 4;
   399 			}
   400 			else if ((cur_char >> 1) == 0x7e)	// Six byte UTF8 -> 1 UTF32
   401 			{
   402 				unichar = (0x01&cur_char);
   403 				cont_bytes = 5;
   404 			}
   405 			else
   406 			{
   407 				wout += LL_UNKNOWN_CHAR;
   408 				++i;
   409 				continue;
   410 			}
   412 			// Check that this character doesn't go past the end of the string
   413 			S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
   414 			do
   415 			{
   416 				++i;
   418 				cur_char = utf8str[i];
   419 				if ( (cur_char >> 6) == 0x2 )
   420 				{
   421 					unichar <<= 6;
   422 					unichar += (0x3F&cur_char);
   423 				}
   424 				else
   425 				{
   426 					// Malformed sequence - roll back to look at this as a new char
   427 					unichar = LL_UNKNOWN_CHAR;
   428 					--i;
   429 					break;
   430 				}
   431 			} while(i < end);
   433 			// Handle overlong characters and NULL characters
   434 			if ( ((cont_bytes == 1) && (unichar < 0x80))
   435 				|| ((cont_bytes == 2) && (unichar < 0x800))
   436 				|| ((cont_bytes == 3) && (unichar < 0x10000))
   437 				|| ((cont_bytes == 4) && (unichar < 0x200000))
   438 				|| ((cont_bytes == 5) && (unichar < 0x4000000)) )
   439 			{
   440 				unichar = LL_UNKNOWN_CHAR;
   441 			}
   442 		}
   444 		wout += unichar;
   445 		++i;
   446 	}
   447 	return wout;
   448 }
   450 LLWString utf8str_to_wstring(const std::string& utf8str)
   451 {
   452 	const S32 len = (S32)utf8str.length();
   453 	return utf8str_to_wstring(utf8str, len);
   454 }
   456 std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
   457 {
   458 	std::string out;
   460 	S32 i = 0;
   461 	while (i < len)
   462 	{
   463 		char tchars[8];		/* Flawfinder: ignore */
   464 		S32 n = wchar_to_utf8chars(utf32str[i], tchars);
   465 		tchars[n] = 0;
   466 		out += tchars;
   467 		i++;
   468 	}
   469 	return out;
   470 }
   472 std::string wstring_to_utf8str(const LLWString& utf32str)
   473 {
   474 	const S32 len = (S32)utf32str.length();
   475 	return wstring_to_utf8str(utf32str, len);
   476 }
   478 std::string utf16str_to_utf8str(const llutf16string& utf16str)
   479 {
   480 	return wstring_to_utf8str(utf16str_to_wstring(utf16str));
   481 }
   483 std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
   484 {
   485 	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
   486 }
   488 std::string utf8str_trim(const std::string& utf8str)
   489 {
   490 	LLWString wstr = utf8str_to_wstring(utf8str);
   491 	LLWStringUtil::trim(wstr);
   492 	return wstring_to_utf8str(wstr);
   493 }
   496 std::string utf8str_tolower(const std::string& utf8str)
   497 {
   498 	LLWString out_str = utf8str_to_wstring(utf8str);
   499 	LLWStringUtil::toLower(out_str);
   500 	return wstring_to_utf8str(out_str);
   501 }
   504 S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
   505 {
   506 	LLWString wlhs = utf8str_to_wstring(lhs);
   507 	LLWString wrhs = utf8str_to_wstring(rhs);
   508 	return LLWStringUtil::compareInsensitive(wlhs, wrhs);
   509 }
   511 std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
   512 {
   513 	if (0 == max_len)
   514 	{
   515 		return std::string();
   516 	}
   517 	if ((S32)utf8str.length() <= max_len)
   518 	{
   519 		return utf8str;
   520 	}
   521 	else
   522 	{
   523 		S32 cur_char = max_len;
   525 		// If we're ASCII, we don't need to do anything
   526 		if ((U8)utf8str[cur_char] > 0x7f)
   527 		{
   528 			// If first two bits are (10), it's the tail end of a multibyte char.  We need to shift back
   529 			// to the first character
   530 			while (0x80 == (0xc0 & utf8str[cur_char]))
   531 			{
   532 				cur_char--;
   533 				// Keep moving forward until we hit the first char;
   534 				if (cur_char == 0)
   535 				{
   536 					// Make sure we don't trash memory if we've got a bogus string.
   537 					break;
   538 				}
   539 			}
   540 		}
   541 		// The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
   542 		return utf8str.substr(0, cur_char);
   543 	}
   544 }
   546 std::string utf8str_substChar(
   547 	const std::string& utf8str,
   548 	const llwchar target_char,
   549 	const llwchar replace_char)
   550 {
   551 	LLWString wstr = utf8str_to_wstring(utf8str);
   552 	LLWStringUtil::replaceChar(wstr, target_char, replace_char);
   553 	//wstr = wstring_substChar(wstr, target_char, replace_char);
   554 	return wstring_to_utf8str(wstr);
   555 }
   557 std::string utf8str_makeASCII(const std::string& utf8str)
   558 {
   559 	LLWString wstr = utf8str_to_wstring(utf8str);
   560 	LLWStringUtil::_makeASCII(wstr);
   561 	return wstring_to_utf8str(wstr);
   562 }
   564 std::string mbcsstring_makeASCII(const std::string& wstr)
   565 {
   566 	// Replace non-ASCII chars with replace_char
   567 	std::string out_str = wstr;
   568 	for (S32 i = 0; i < (S32)out_str.length(); i++)
   569 	{
   570 		if ((U8)out_str[i] > 0x7f)
   571 		{
   572 			out_str[i] = LL_UNKNOWN_CHAR;
   573 		}
   574 	}
   575 	return out_str;
   576 }
   577 std::string utf8str_removeCRLF(const std::string& utf8str)
   578 {
   579 	if (0 == utf8str.length())
   580 	{
   581 		return std::string();
   582 	}
   583 	const char CR = 13;
   585 	std::string out;
   586 	out.reserve(utf8str.length());
   587 	const S32 len = (S32)utf8str.length();
   588 	for( S32 i = 0; i < len; i++ )
   589 	{
   590 		if( utf8str[i] != CR )
   591 		{
   592 			out.push_back(utf8str[i]);
   593 		}
   594 	}
   595 	return out;
   596 }
   598 bool LLStringOps::isHexString(const std::string& str)
   599 {
   600 	const char* buf = str.c_str();
   601 	int len = str.size();
   602 	while (--len >= 0)
   603 	{
   604 		if (!isxdigit(buf[len])) return false;
   605 	}
   607 	return true;
   608 }
   610 #if LL_WINDOWS
   611 // documentation moved to header. Phoenix 2007-11-27
   612 namespace snprintf_hack
   613 {
   614 	int snprintf(char *str, size_t size, const char *format, ...)
   615 	{
   616 		va_list args;
   617 		va_start(args, format);
   619 		int num_written = _vsnprintf(str, size, format, args); /* Flawfinder: ignore */
   620 		va_end(args);
   622 		str[size-1] = '\0'; // always null terminate
   623 		return num_written;
   624 	}
   625 }
   627 std::string ll_convert_wide_to_string(const wchar_t* in)
   628 {
   629 	std::string out;
   630 	if(in)
   631 	{
   632 		int len_in = wcslen(in);
   633 		int len_out = WideCharToMultiByte(
   634 			CP_ACP,
   635 			0,
   636 			in,
   637 			len_in,
   638 			NULL,
   639 			0,
   640 			0,
   641 			0);
   642 		// We will need two more bytes for the double NULL ending
   643 		// created in WideCharToMultiByte().
   644 		char* pout = new char [len_out + 2];
   645 		memset(pout, 0, len_out + 2);
   646 		if(pout)
   647 		{
   648 			WideCharToMultiByte(
   649 				CP_ACP,
   650 				0,
   651 				in,
   652 				len_in,
   653 				pout,
   654 				len_out,
   655 				0,
   656 				0);
   657 			out.assign(pout);
   658 			delete[] pout;
   659 		}
   660 	}
   661 	return out;
   662 }
   663 #endif // LL_WINDOWS
   665 S32	LLStringOps::collate(const llwchar* a, const llwchar* b)
   666 { 
   667 	#if LL_WINDOWS
   668 		// in Windows, wide string functions operator on 16-bit strings, 
   669 		// not the proper 32 bit wide string
   670 		return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
   671 	#else
   672 		return wcscoll(a, b);
   673 	#endif
   674 }
   676 namespace LLStringFn
   677 {
   678 	// NOTE - this restricts output to ascii
   679 	void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement)
   680 	{
   681 		const char MIN = 0x20;
   682 		std::basic_string<char>::size_type len = string.size();
   683 		for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
   684 		{
   685 			if(string[ii] < MIN)
   686 			{
   687 				string[ii] = replacement;
   688 			}
   689 		}
   690 	}
   693 	// NOTE - this restricts output to ascii
   694 	void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
   695 									   char replacement)
   696 	{
   697 		const char MIN  = 0x20;
   698 		const char PIPE = 0x7c;
   699 		std::basic_string<char>::size_type len = str.size();
   700 		for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
   701 		{
   702 			if( (str[ii] < MIN) || (str[ii] == PIPE) )
   703 			{
   704 				str[ii] = replacement;
   705 			}
   706 		}
   707 	}
   709 	// https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
   710 	// allowable code points for XML. Specifically, they are:
   711 	// 0x09, 0x0a, 0x0d, and 0x20 on up.  JC
   712 	std::string strip_invalid_xml(const std::string& input)
   713 	{
   714 		std::string output;
   715 		output.reserve( input.size() );
   716 		std::string::const_iterator it = input.begin();
   717 		while (it != input.end())
   718 		{
   719 			// Must compare as unsigned for >=
   720 			// Test most likely match first
   721 			const unsigned char c = (unsigned char)*it;
   722 			if (   c >= (unsigned char)0x20   // SPACE
   723 				|| c == (unsigned char)0x09   // TAB
   724 				|| c == (unsigned char)0x0a   // LINE_FEED
   725 				|| c == (unsigned char)0x0d ) // CARRIAGE_RETURN
   726 			{
   727 				output.push_back(c);
   728 			}
   729 			++it;
   730 		}
   731 		return output;
   732 	}
   734 	/**
   735 	 * @brief Replace all control characters (c < 0x20) with replacement in
   736 	 * string.
   737 	 */
   738 	void replace_ascii_controlchars(std::basic_string<char>& string, char replacement)
   739 	{
   740 		const unsigned char MIN = 0x20;
   741 		std::basic_string<char>::size_type len = string.size();
   742 		for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
   743 		{
   744 			const unsigned char c = (unsigned char) string[ii];
   745 			if(c < MIN)
   746 			{
   747 				string[ii] = replacement;
   748 			}
   749 		}
   750 	}
   751 }
   754 ////////////////////////////////////////////////////////////
   755 // Testing
   757 #ifdef _DEBUG
   759 template<class T> 
   760 void LLStringUtilBase<T>::testHarness()
   761 {
   762 	std::string s1;
   764 	llassert( s1.c_str() == NULL );
   765 	llassert( s1.size() == 0 );
   766 	llassert( s1.empty() );
   768 	std::string s2( "hello");
   769 	llassert( !strcmp( s2.c_str(), "hello" ) );
   770 	llassert( s2.size() == 5 ); 
   771 	llassert( !s2.empty() );
   772 	std::string s3( s2 );
   774 	llassert( "hello" == s2 );
   775 	llassert( s2 == "hello" );
   776 	llassert( s2 > "gello" );
   777 	llassert( "gello" < s2 );
   778 	llassert( "gello" != s2 );
   779 	llassert( s2 != "gello" );
   781 	std::string s4 = s2;
   782 	llassert( !s4.empty() );
   783 	s4.empty();
   784 	llassert( s4.empty() );
   786 	std::string s5("");
   787 	llassert( s5.empty() );
   789 	llassert( isValidIndex(s5, 0) );
   790 	llassert( !isValidIndex(s5, 1) );
   792 	s3 = s2;
   793 	s4 = "hello again";
   795 	s4 += "!";
   796 	s4 += s4;
   797 	llassert( s4 == "hello again!hello again!" );
   800 	std::string s6 = s2 + " " + s2;
   801 	std::string s7 = s6;
   802 	llassert( s6 == s7 );
   803 	llassert( !( s6 != s7) );
   804 	llassert( !(s6 < s7) );
   805 	llassert( !(s6 > s7) );
   807 	llassert( !(s6 == "hi"));
   808 	llassert( s6 == "hello hello");
   809 	llassert( s6 < "hi");
   811 	llassert( s6[1] == 'e' );
   812 	s6[1] = 'f';
   813 	llassert( s6[1] == 'f' );
   815 	s2.erase( 4, 1 );
   816 	llassert( s2 == "hell");
   817 	s2.insert( 0, 'y' );
   818 	llassert( s2 == "yhell");
   819 	s2.erase( 1, 3 );
   820 	llassert( s2 == "yl");
   821 	s2.insert( 1, "awn, don't yel");
   822 	llassert( s2 == "yawn, don't yell");
   824 	std::string s8 = s2.substr( 6, 5 );
   825 	llassert( s8 == "don't"  );
   827 	std::string s9 = "   \t\ntest  \t\t\n  ";
   828 	trim(s9);
   829 	llassert( s9 == "test"  );
   831 	s8 = "abc123&*(ABC";
   833 	s9 = s8;
   834 	toUpper(s9);
   835 	llassert( s9 == "ABC123&*(ABC"  );
   837 	s9 = s8;
   838 	toLower(s9);
   839 	llassert( s9 == "abc123&*(abc"  );
   842 	std::string s10( 10, 'x' );
   843 	llassert( s10 == "xxxxxxxxxx" );
   845 	std::string s11( "monkey in the middle", 7, 2 );
   846 	llassert( s11 == "in" );
   848 	std::string s12;  //empty
   849 	s12 += "foo";
   850 	llassert( s12 == "foo" );
   852 	std::string s13;  //empty
   853 	s13 += 'f';
   854 	llassert( s13 == "f" );
   855 }
   858 #endif  // _DEBUG

mercurial