winamp/Src/Plugins/DSP/sc_serv3/nmrCommon/unicode/uniString.h

#pragma once
#ifndef uniString_H_
#define uniString_H_

#include "intTypes.h"
#include <stdexcept>
#include <string>
#include <vector>

namespace uniString
{
	// exception for bad utf8/utf16 conversions
	class badUnicodeData: public std::runtime_error
	{
	public:
		explicit badUnicodeData(const std::string &msg) : std::runtime_error(msg){}
	};

	#if __BYTE_ORDER == __LITTLE_ENDIAN
		static const bool leSystem = true;
	#else
		static const bool leSystem = false;
	#endif

	class utf8;
	class utf16;
	class utf32;

	template<typename S>
	size_t strlen(const S *s) throw()
	{
		if (!s) return 0;
		size_t len(0);
		while (*(s++))
		{
			++len;
		}
		return len;
	}

	// string length calculator with a limit (in case the string is badly formed)
	template<typename S>
	size_t strlen(const S *s, size_t maxamt) throw()
	{
		if (!s) return 0;
		size_t len(0);
		while (maxamt && (*s))
		{
			++s;
			++len;
			--maxamt;
		}
		return len;
	}

	template<typename T>
	bool is_a_number(T t)
	{
		return (((int)t) >= ((int)'0') && ((int)t) <= ((int)'9'));
	}

	template <typename I>
	bool is_a_number(I ibegin,I iend)
	{
		for (I i = ibegin; i != iend; ++i)
		{
			if (!is_a_number(*i))
			{
				return false;
			}
		}
		return true;
	}

	class utf16: public std::basic_string<__uint16>
	{
	public:
		typedef std::basic_string<__uint16> base_t;

		utf16(){}
		utf16(const utf16 &u16) : base_t(u16){}
		explicit utf16(const utf16::value_type *u16) : base_t(u16){}
		explicit utf16(const utf16::value_type *u16, utf16::size_type len) : base_t(u16, len){}
	#ifdef _WIN32
		explicit utf16(const std::wstring &w) : base_t(w.begin(), w.end()){}
	#endif
		explicit utf16(utf16::size_type Cnt,utf16::value_type Val) : base_t(Cnt, Val){}
		explicit utf16(const base_t &u16) : base_t(u16){}

		void assign(const utf32 &u32,bool leadingBOM = true, bool littleEndian = leSystem) throw();
		utf16 escapeXML() const throw();
	};

	class utf8: public std::basic_string<__uint8>
	{
	public:
		typedef std::basic_string<__uint8> base_t;
		utf8(){}

		utf8(const utf8 &u8) : base_t(u8) {}

		utf8(const utf8::value_type *u8) : base_t(u8) {}

		explicit utf8(const utf8::value_type *u8, utf8::size_type len) : base_t(u8, len) {}

		utf8(const char *u8) : base_t((const utf8::value_type *)u8) {}

		explicit utf8(const char *u8,utf8::size_type len) : base_t((const utf8::value_type *)u8, len) {}

		utf8(const std::string &s) : base_t(s.begin(), s.end()) {}

		explicit utf8(utf8::size_type Cnt, utf8::value_type Val) : base_t(Cnt, Val) {}

		utf8(const base_t &u8) : base_t(u8) {}

		template<typename T>
		explicit utf8(T ibegin, T iend) : base_t(ibegin, iend) {}

		explicit utf8(const utf32 &u32, bool leadingBOM = false)
		{
			assign(u32, leadingBOM);
		}

		void assign(const utf32 &u32, bool leadingBOM = false) throw();
		bool isValid(bool allowIncompleteEndingSequence = false) const throw();
		utf8 escapeXML() const throw();
		int toInt() const throw();

		// sometimes we need to stuff this in a string. For example if we want to throw
		// a runtime_error exception with the utf8 data.
		std::string hideAsString() const throw()
		{
			std::string s(begin(), end());
			return s;
		}

		// remove code points above 0x7f
		std::string toANSI(bool allowHighBitCodePoints = false) const throw();
		// convenience function for windows
	#ifdef _WIN32
		// converts to utf16
		std::wstring toWString() const throw(badUnicodeData);
	#endif

		utf8 operator+(const utf8 &u8) const throw()
		{
			utf8 result(*this);
			result.insert(result.end(), u8.begin(), u8.end());
			return result;
		}

		utf8 operator+(const char *s) const throw()
		{
			utf8 result(*this);
			result.insert(result.end(), s, s + strlen(s));
			return result;
		}

		utf8 operator+(const std::string &s) const throw()
		{
			utf8 result(*this);
			result.insert(result.end(), s.begin(), s.end());
			return result;
		}

		template<typename T>
		utf8 operator+(T v) const throw()
		{
			utf8 result(*this);
			result += v;
			return result;
		}

		template<typename T>
		utf8& operator+=(T v) throw()
		{
			base_t::operator+=(v);
			return *this;
		}

		utf8& operator+=(const std::string &s) throw()
		{
			utf8 result = (*this) + s;
			*this = result;
			return *this;
		}

		utf8& operator+=(const char *s) throw()
		{
			utf8 result = (*this) + s;
			*this = result;
			return *this;
		}

		template<typename ITER>
		bool equals(ITER ibegin, ITER iend) const throw()
		{
			const_iterator i1 = begin();
			ITER i2 = ibegin;
			while ((i1 != end()) && (i2 != iend))
			{
				if (((utf8::value_type)*i1) != ((utf8::value_type)*i2))
				{
					return false;
				}
				++i1;
				++i2;
			}
			return ((i1 == end()) && (i2 == iend));
		}

		bool operator==(const char *s) const throw()
		{
			return equals(s, s + strlen(s));
		}

		bool operator==(const std::string &s) const throw()
		{
			return equals(s.begin(), s.end());
		}
	};

	inline utf8 operator+(const char *s, const utf8 &u8) throw()
	{
		utf8 r(s);
		r.insert(r.end(), u8.begin(), u8.end());
		return r;
	}

	inline utf8 operator+(const std::string &s, const utf8 &u8) throw()
	{
		utf8 r(s);
		r.insert(r.end(), u8.begin(), u8.end());
		return r;
	}

	class utf32: public std::basic_string<__uint32>
	{
	public:
		typedef std::basic_string<__uint32> base_t;

		utf32(){}
		utf32(const utf32 &us): base_t(us){}
		utf32(const utf32::value_type *u32) : base_t(u32){}
		explicit utf32(const utf32::value_type *u32, utf32::size_type len) : base_t(u32, len){}
		explicit utf32(utf32::size_type Cnt, utf32::value_type Val) : base_t(Cnt, Val){}
		utf32(const base_t &s) : base_t(s){}
		explicit utf32(utf32::const_iterator b, utf32::const_iterator e) : base_t(b, e){}
		explicit utf32(utf32::const_reverse_iterator b, utf32::const_reverse_iterator e) : base_t(b, e){}

		explicit utf32(const __int8 *s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const __int8 *s, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const std::string &s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const utf8 &u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const utf8::value_type *u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const utf8::value_type *u8, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assignAsHighBitANSI(const utf8::value_type *u8) throw();
		void assign(const __int8 *s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const __int8 *s, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const std::string &s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const utf8 &u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const utf8::value_type *u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const utf8::value_type *u8, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);

		// use this one if the source is LATIN-1 and it may contain high bit characters
		void assignFromLatinExtended(const std::string &s) throw();
		void assignFromLatinExtended(const __uint8 *s) throw(); // null terminated
		void assignFromLatinExtended(const __uint8 *s, size_t len) throw(); // no termination
		//////////////////

		explicit utf32(const utf16 &u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const utf16::value_type *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const utf16::value_type *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
	#ifdef _WIN32
		explicit utf32(const std::wstring &w, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const wchar_t *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		explicit utf32(const wchar_t *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
	#endif
		void assign(const utf16 &u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const utf16::value_type *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const utf16::value_type *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
	#ifdef _WIN32
		void assign(const std::wstring &w, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const wchar_t *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
		void assign(const wchar_t *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
	#endif
		utf8 toUtf8(bool leadingBOM = false) const throw();
		void toUtf8(utf8 &u8, bool leadingBOM = false) const throw();
		void toUtf8(std::string &s, bool leadingBOM = false) const throw();
		utf16 toUtf16(bool leadingBOM = false, bool littleEndian = leSystem) const throw();
		void toUtf16(utf16 &u16, bool leadingBOM = false, bool littleEndian = leSystem) const throw();
	#ifdef _WIN32
		void toUtf16(std::wstring &w, bool leadingBOM = false, bool littleEndian = leSystem) const throw();
	#endif

		utf32 escapeXML() const throw();

		utf32& operator+=(const utf32 &u32)
		{
			insert(end(), u32.begin(), u32.end());
			return *this;
		}

		utf32 operator+(const utf32 &u32)
		{
			utf32 result(*this);
			result += u32;
			return result;
		}
	};

	static const utf16::value_type Utf16BOM = 0xfeff;
}

inline uniString::utf8 asciiToUtf8(std::string s)
{
	uniString::utf8 result;
	const size_t siz = s.size();
	for (size_t i = 0; i < siz; i++)
	{
		int c = s[i];

		// nothing specicial to do if 0-127
		if (c < 128 && c >= 0)
		{
			if (c >= 0 && c <= 31)
			{
				if (c == 9 || c == 10 || c == 13)
				{
					result.push_back((__uint8)c);
				}
			}
			else
			{
				result.push_back((__uint8)c);
			}
		}
		// otherwise we need to attempt to convert
		else
		{
			// bump back to +ve
			if (c < 128)
			{
				c += 256;
			}
			result.push_back((__uint8)(c >> 6) | 0xC0);
			result.push_back((__uint8)(c & 0x3F) | 0x80);
		}
	}

	return result;
}

#endif