winamp/Src/Plugins/DSP/sc_serv3/nmrCommon/unicode/uniString.h
2024-09-24 14:54:57 +02:00

355 lines
10 KiB
C++

#pragma once
#ifndef uniString_H_
#define uniString_H_
#include "intTypes.h"
#include <stdexcept>
#include <string>
#include <vector>
namespace uniString
{
// exception for bad utf8/utf16 conversions
class badUnicodeData: public std::runtime_error
{
public:
explicit badUnicodeData(const std::string &msg) : std::runtime_error(msg){}
};
#if __BYTE_ORDER == __LITTLE_ENDIAN
static const bool leSystem = true;
#else
static const bool leSystem = false;
#endif
class utf8;
class utf16;
class utf32;
template<typename S>
size_t strlen(const S *s) throw()
{
if (!s) return 0;
size_t len(0);
while (*(s++))
{
++len;
}
return len;
}
// string length calculator with a limit (in case the string is badly formed)
template<typename S>
size_t strlen(const S *s, size_t maxamt) throw()
{
if (!s) return 0;
size_t len(0);
while (maxamt && (*s))
{
++s;
++len;
--maxamt;
}
return len;
}
template<typename T>
bool is_a_number(T t)
{
return (((int)t) >= ((int)'0') && ((int)t) <= ((int)'9'));
}
template <typename I>
bool is_a_number(I ibegin,I iend)
{
for (I i = ibegin; i != iend; ++i)
{
if (!is_a_number(*i))
{
return false;
}
}
return true;
}
class utf16: public std::basic_string<__uint16>
{
public:
typedef std::basic_string<__uint16> base_t;
utf16(){}
utf16(const utf16 &u16) : base_t(u16){}
explicit utf16(const utf16::value_type *u16) : base_t(u16){}
explicit utf16(const utf16::value_type *u16, utf16::size_type len) : base_t(u16, len){}
#ifdef _WIN32
explicit utf16(const std::wstring &w) : base_t(w.begin(), w.end()){}
#endif
explicit utf16(utf16::size_type Cnt,utf16::value_type Val) : base_t(Cnt, Val){}
explicit utf16(const base_t &u16) : base_t(u16){}
void assign(const utf32 &u32,bool leadingBOM = true, bool littleEndian = leSystem) throw();
utf16 escapeXML() const throw();
};
class utf8: public std::basic_string<__uint8>
{
public:
typedef std::basic_string<__uint8> base_t;
utf8(){}
utf8(const utf8 &u8) : base_t(u8) {}
utf8(const utf8::value_type *u8) : base_t(u8) {}
explicit utf8(const utf8::value_type *u8, utf8::size_type len) : base_t(u8, len) {}
utf8(const char *u8) : base_t((const utf8::value_type *)u8) {}
explicit utf8(const char *u8,utf8::size_type len) : base_t((const utf8::value_type *)u8, len) {}
utf8(const std::string &s) : base_t(s.begin(), s.end()) {}
explicit utf8(utf8::size_type Cnt, utf8::value_type Val) : base_t(Cnt, Val) {}
utf8(const base_t &u8) : base_t(u8) {}
template<typename T>
explicit utf8(T ibegin, T iend) : base_t(ibegin, iend) {}
explicit utf8(const utf32 &u32, bool leadingBOM = false)
{
assign(u32, leadingBOM);
}
void assign(const utf32 &u32, bool leadingBOM = false) throw();
bool isValid(bool allowIncompleteEndingSequence = false) const throw();
utf8 escapeXML() const throw();
int toInt() const throw();
// sometimes we need to stuff this in a string. For example if we want to throw
// a runtime_error exception with the utf8 data.
std::string hideAsString() const throw()
{
std::string s(begin(), end());
return s;
}
// remove code points above 0x7f
std::string toANSI(bool allowHighBitCodePoints = false) const throw();
// convenience function for windows
#ifdef _WIN32
// converts to utf16
std::wstring toWString() const throw(badUnicodeData);
#endif
utf8 operator+(const utf8 &u8) const throw()
{
utf8 result(*this);
result.insert(result.end(), u8.begin(), u8.end());
return result;
}
utf8 operator+(const char *s) const throw()
{
utf8 result(*this);
result.insert(result.end(), s, s + strlen(s));
return result;
}
utf8 operator+(const std::string &s) const throw()
{
utf8 result(*this);
result.insert(result.end(), s.begin(), s.end());
return result;
}
template<typename T>
utf8 operator+(T v) const throw()
{
utf8 result(*this);
result += v;
return result;
}
template<typename T>
utf8& operator+=(T v) throw()
{
base_t::operator+=(v);
return *this;
}
utf8& operator+=(const std::string &s) throw()
{
utf8 result = (*this) + s;
*this = result;
return *this;
}
utf8& operator+=(const char *s) throw()
{
utf8 result = (*this) + s;
*this = result;
return *this;
}
template<typename ITER>
bool equals(ITER ibegin, ITER iend) const throw()
{
const_iterator i1 = begin();
ITER i2 = ibegin;
while ((i1 != end()) && (i2 != iend))
{
if (((utf8::value_type)*i1) != ((utf8::value_type)*i2))
{
return false;
}
++i1;
++i2;
}
return ((i1 == end()) && (i2 == iend));
}
bool operator==(const char *s) const throw()
{
return equals(s, s + strlen(s));
}
bool operator==(const std::string &s) const throw()
{
return equals(s.begin(), s.end());
}
};
inline utf8 operator+(const char *s, const utf8 &u8) throw()
{
utf8 r(s);
r.insert(r.end(), u8.begin(), u8.end());
return r;
}
inline utf8 operator+(const std::string &s, const utf8 &u8) throw()
{
utf8 r(s);
r.insert(r.end(), u8.begin(), u8.end());
return r;
}
class utf32: public std::basic_string<__uint32>
{
public:
typedef std::basic_string<__uint32> base_t;
utf32(){}
utf32(const utf32 &us): base_t(us){}
utf32(const utf32::value_type *u32) : base_t(u32){}
explicit utf32(const utf32::value_type *u32, utf32::size_type len) : base_t(u32, len){}
explicit utf32(utf32::size_type Cnt, utf32::value_type Val) : base_t(Cnt, Val){}
utf32(const base_t &s) : base_t(s){}
explicit utf32(utf32::const_iterator b, utf32::const_iterator e) : base_t(b, e){}
explicit utf32(utf32::const_reverse_iterator b, utf32::const_reverse_iterator e) : base_t(b, e){}
explicit utf32(const __int8 *s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const __int8 *s, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const std::string &s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const utf8 &u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const utf8::value_type *u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const utf8::value_type *u8, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assignAsHighBitANSI(const utf8::value_type *u8) throw();
void assign(const __int8 *s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const __int8 *s, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const std::string &s, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const utf8 &u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const utf8::value_type *u8, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const utf8::value_type *u8, size_t len, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
// use this one if the source is LATIN-1 and it may contain high bit characters
void assignFromLatinExtended(const std::string &s) throw();
void assignFromLatinExtended(const __uint8 *s) throw(); // null terminated
void assignFromLatinExtended(const __uint8 *s, size_t len) throw(); // no termination
//////////////////
explicit utf32(const utf16 &u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const utf16::value_type *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const utf16::value_type *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
#ifdef _WIN32
explicit utf32(const std::wstring &w, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const wchar_t *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
explicit utf32(const wchar_t *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
#endif
void assign(const utf16 &u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const utf16::value_type *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const utf16::value_type *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
#ifdef _WIN32
void assign(const std::wstring &w, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const wchar_t *u16, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
void assign(const wchar_t *u16, size_t len, bool assumeLittleEndian = leSystem, bool allowIncompleteEndingSequence = false) throw(badUnicodeData);
#endif
utf8 toUtf8(bool leadingBOM = false) const throw();
void toUtf8(utf8 &u8, bool leadingBOM = false) const throw();
void toUtf8(std::string &s, bool leadingBOM = false) const throw();
utf16 toUtf16(bool leadingBOM = false, bool littleEndian = leSystem) const throw();
void toUtf16(utf16 &u16, bool leadingBOM = false, bool littleEndian = leSystem) const throw();
#ifdef _WIN32
void toUtf16(std::wstring &w, bool leadingBOM = false, bool littleEndian = leSystem) const throw();
#endif
utf32 escapeXML() const throw();
utf32& operator+=(const utf32 &u32)
{
insert(end(), u32.begin(), u32.end());
return *this;
}
utf32 operator+(const utf32 &u32)
{
utf32 result(*this);
result += u32;
return result;
}
};
static const utf16::value_type Utf16BOM = 0xfeff;
}
inline uniString::utf8 asciiToUtf8(std::string s)
{
uniString::utf8 result;
const size_t siz = s.size();
for (size_t i = 0; i < siz; i++)
{
int c = s[i];
// nothing specicial to do if 0-127
if (c < 128 && c >= 0)
{
if (c >= 0 && c <= 31)
{
if (c == 9 || c == 10 || c == 13)
{
result.push_back((__uint8)c);
}
}
else
{
result.push_back((__uint8)c);
}
}
// otherwise we need to attempt to convert
else
{
// bump back to +ve
if (c < 128)
{
c += 256;
}
result.push_back((__uint8)(c >> 6) | 0xC0);
result.push_back((__uint8)(c & 0x3F) | 0x80);
}
}
return result;
}
#endif