Minetest  5.4.0
irrUString.h
Go to the documentation of this file.
1 /*
2  Basic Unicode string class for Irrlicht.
3  Copyright (c) 2009-2011 John Norman
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any
7  damages arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any
10  purpose, including commercial applications, and to alter it and
11  redistribute it freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you
14  must not claim that you wrote the original software. If you use
15  this software in a product, an acknowledgment in the product
16  documentation would be appreciated but is not required.
17 
18  2. Altered source versions must be plainly marked as such, and
19  must not be misrepresented as being the original software.
20 
21  3. This notice may not be removed or altered from any source
22  distribution.
23 
24  The original version of this class can be located at:
25  http://irrlicht.suckerfreegames.com/
26 
27  John Norman
28  john@suckerfreegames.com
29 */
30 
31 #pragma once
32 
33 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
34 # define USTRING_CPP0X
35 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
36 # define USTRING_CPP0X_NEWLITERALS
37 # endif
38 #endif
39 
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <cstddef>
44 
45 #ifdef _WIN32
46 #define __BYTE_ORDER 0
47 #define __LITTLE_ENDIAN 0
48 #define __BIG_ENDIAN 1
49 #elif defined(__MACH__) && defined(__APPLE__)
50 #include <machine/endian.h>
51 #elif defined(__FreeBSD__) || defined(__DragonFly__)
52 #include <sys/endian.h>
53 #else
54 #include <endian.h>
55 #endif
56 
57 #ifdef USTRING_CPP0X
58 # include <utility>
59 #endif
60 
61 #ifndef USTRING_NO_STL
62 # include <string>
63 # include <iterator>
64 # include <ostream>
65 #endif
66 
67 #include "irrTypes.h"
68 #include "irrAllocator.h"
69 #include "irrArray.h"
70 #include "irrMath.h"
71 #include "irrString.h"
72 #include "path.h"
73 
75 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
76 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
77 
79 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
80 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
81 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
82 
83 
84 namespace irr
85 {
86 
87  // Define our character types.
88 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
89  typedef char32_t uchar32_t;
90  typedef char16_t uchar16_t;
91  typedef char uchar8_t;
92 #else
93  typedef u32 uchar32_t;
94  typedef u16 uchar16_t;
95  typedef u8 uchar8_t;
96 #endif
97 
98 namespace core
99 {
100 
101 namespace unicode
102 {
103 
105 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
106 
112 {
113  // Convert the surrogate pair into a single UTF-32 character.
114  uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
115  uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
116  return (wu << 16) | x;
117 }
118 
122 {
123  return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
124 }
125 
129 {
130  return ((c >> 24) & 0x000000FF) |
131  ((c >> 8) & 0x0000FF00) |
132  ((c << 8) & 0x00FF0000) |
133  ((c << 24) & 0xFF000000);
134 }
135 
137 const u16 BOM = 0xFEFF;
138 
140 const u8 BOM_UTF8_LEN = 3;
141 const u8 BOM_UTF16_LEN = 1;
142 const u8 BOM_UTF32_LEN = 1;
143 
145 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
146 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
147 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
148 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
149 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
150 
152 const u8 BOM_ENCODE_UTF8_LEN = 3;
153 const u8 BOM_ENCODE_UTF16_LEN = 2;
154 const u8 BOM_ENCODE_UTF32_LEN = 4;
155 
158 {
167 };
168 
171 {
174  EUTFEE_BIG
175 };
176 
179 
182 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
183 {
184 #define COPY_ARRAY(source, size) \
185  memcpy(ret.pointer(), source, size); \
186  ret.set_used(size)
187 
188  core::array<u8> ret(4);
189  switch (mode)
190  {
191  case EUTFE_UTF8:
193  break;
194  case EUTFE_UTF16:
195  #ifdef __BIG_ENDIAN__
197  #else
199  #endif
200  break;
201  case EUTFE_UTF16_BE:
203  break;
204  case EUTFE_UTF16_LE:
206  break;
207  case EUTFE_UTF32:
208  #ifdef __BIG_ENDIAN__
210  #else
212  #endif
213  break;
214  case EUTFE_UTF32_BE:
216  break;
217  case EUTFE_UTF32_LE:
219  break;
220  case EUTFE_NONE:
221  // TODO sapier: fixed warning only,
222  // don't know if something needs to be done here
223  break;
224  }
225  return ret;
226 
227 #undef COPY_ARRAY
228 }
229 
233 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
234 {
235  if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
236  if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
237  if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
238  if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
239  if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
240  return EUTFE_NONE;
241 }
242 
243 } // end namespace unicode
244 
245 
247 template <typename TAlloc = irrAllocator<uchar16_t> >
249 {
250 public:
251 
255 
258  {
259  public:
261 
263  operator uchar32_t() const
264  {
265  return _get();
266  }
267 
272  {
273  _set(c);
274  return *this;
275  }
276 
280  {
281  _set(_get() + 1);
282  return *this;
283  }
284 
288  {
289  uchar32_t old = _get();
290  _set(old + 1);
291  return old;
292  }
293 
297  {
298  _set(_get() - 1);
299  return *this;
300  }
301 
305  {
306  uchar32_t old = _get();
307  _set(old - 1);
308  return old;
309  }
310 
315  {
316  _set(_get() + val);
317  return *this;
318  }
319 
324  {
325  _set(_get() - val);
326  return *this;
327  }
328 
333  {
334  _set(_get() * val);
335  return *this;
336  }
337 
342  {
343  _set(_get() / val);
344  return *this;
345  }
346 
351  {
352  _set(_get() % val);
353  return *this;
354  }
355 
359  uchar32_t operator+(int val) const
360  {
361  return _get() + val;
362  }
363 
367  uchar32_t operator-(int val) const
368  {
369  return _get() - val;
370  }
371 
375  uchar32_t operator*(int val) const
376  {
377  return _get() * val;
378  }
379 
383  uchar32_t operator/(int val) const
384  {
385  return _get() / val;
386  }
387 
391  uchar32_t operator%(int val) const
392  {
393  return _get() % val;
394  }
395 
396  private:
398  uchar32_t _get() const
399  {
400  const uchar16_t* a = ref->c_str();
401  if (!UTF16_IS_SURROGATE(a[pos]))
402  return static_cast<uchar32_t>(a[pos]);
403  else
404  {
405  if (pos + 1 >= ref->size_raw())
406  return 0;
407 
408  return unicode::toUTF32(a[pos], a[pos + 1]);
409  }
410  }
411 
413  void _set(uchar32_t c)
414  {
415  ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
416  const uchar16_t* a = ref2->c_str();
417  if (c > 0xFFFF)
418  {
419  // c will be multibyte, so split it up into the high and low surrogate pairs.
420  uchar16_t x = static_cast<uchar16_t>(c);
421  uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
422  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
423 
424  // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
425  if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
426  ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
427  else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
428 
429  ref2->replace_raw(vh, static_cast<u32>(pos));
430  }
431  else
432  {
433  // c will be a single byte.
434  uchar16_t vh = static_cast<uchar16_t>(c);
435 
436  // If the previous position was a surrogate pair, remove the extra byte.
437  if (UTF16_IS_SURROGATE_HI(a[pos]))
438  ref2->erase_raw(static_cast<u32>(pos) + 1);
439 
440  ref2->replace_raw(vh, static_cast<u32>(pos));
441  }
442  }
443 
445  u32 pos;
446  };
448 
449 
451 #ifndef USTRING_NO_STL
452  class _ustring16_const_iterator : public std::iterator<
453  std::bidirectional_iterator_tag, // iterator_category
454  access, // value_type
455  ptrdiff_t, // difference_type
456  const access, // pointer
457  const access // reference
458  >
459 #else
461 #endif
462  {
463  public:
465  typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
466  typedef const access const_pointer;
467  typedef const access const_reference;
468 
469 #ifndef USTRING_NO_STL
470  typedef typename _Base::value_type value_type;
471  typedef typename _Base::difference_type difference_type;
472  typedef typename _Base::difference_type distance_type;
473  typedef typename _Base::pointer pointer;
475 #else
476  typedef access value_type;
477  typedef u32 difference_type;
478  typedef u32 distance_type;
479  typedef const_pointer pointer;
480  typedef const_reference reference;
481 #endif
482 
486  _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
487  {
488  if (ref->size_raw() == 0 || p == 0)
489  return;
490 
491  // Go to the appropriate position.
492  u32 i = p;
493  u32 sr = ref->size_raw();
494  const uchar16_t* a = ref->c_str();
495  while (i != 0 && pos < sr)
496  {
497  if (UTF16_IS_SURROGATE_HI(a[pos]))
498  pos += 2;
499  else ++pos;
500  --i;
501  }
502  }
503 
505  bool operator==(const _Iter& iter) const
506  {
507  if (ref == iter.ref && pos == iter.pos)
508  return true;
509  return false;
510  }
511 
513  bool operator!=(const _Iter& iter) const
514  {
515  if (ref != iter.ref || pos != iter.pos)
516  return true;
517  return false;
518  }
519 
522  { // ++iterator
523  if (pos == ref->size_raw()) return *this;
524  const uchar16_t* a = ref->c_str();
525  if (UTF16_IS_SURROGATE_HI(a[pos]))
526  pos += 2; // TODO: check for valid low surrogate?
527  else ++pos;
528  if (pos > ref->size_raw()) pos = ref->size_raw();
529  return *this;
530  }
531 
534  { // iterator++
535  _Iter _tmp(*this);
536  ++*this;
537  return _tmp;
538  }
539 
542  { // --iterator
543  if (pos == 0) return *this;
544  const uchar16_t* a = ref->c_str();
545  --pos;
546  if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
547  --pos;
548  return *this;
549  }
550 
553  { // iterator--
554  _Iter _tmp(*this);
555  --*this;
556  return _tmp;
557  }
558 
562  {
563  if (v == 0) return *this;
564  if (v < 0) return operator-=(v * -1);
565 
566  if (pos >= ref->size_raw())
567  return *this;
568 
569  // Go to the appropriate position.
570  // TODO: Don't force u32 on an x64 OS. Make it agnostic.
571  u32 i = (u32)v;
572  u32 sr = ref->size_raw();
573  const uchar16_t* a = ref->c_str();
574  while (i != 0 && pos < sr)
575  {
576  if (UTF16_IS_SURROGATE_HI(a[pos]))
577  pos += 2;
578  else ++pos;
579  --i;
580  }
581  if (pos > sr)
582  pos = sr;
583 
584  return *this;
585  }
586 
590  {
591  if (v == 0) return *this;
592  if (v > 0) return operator+=(v * -1);
593 
594  if (pos == 0)
595  return *this;
596 
597  // Go to the appropriate position.
598  // TODO: Don't force u32 on an x64 OS. Make it agnostic.
599  u32 i = (u32)v;
600  const uchar16_t* a = ref->c_str();
601  while (i != 0 && pos != 0)
602  {
603  --pos;
604  if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
605  --pos;
606  --i;
607  }
608 
609  return *this;
610  }
611 
614  {
615  _Iter ret(*this);
616  ret += v;
617  return ret;
618  }
619 
622  {
623  _Iter ret(*this);
624  ret -= v;
625  return ret;
626  }
627 
629  difference_type operator-(const _Iter& iter) const
630  {
631  // Make sure we reference the same object!
632  if (ref != iter.ref)
633  return difference_type();
634 
635  _Iter i = iter;
636  difference_type ret;
637 
638  // Walk up.
639  if (pos > i.pos)
640  {
641  while (pos > i.pos)
642  {
643  ++i;
644  ++ret;
645  }
646  return ret;
647  }
648 
649  // Walk down.
650  while (pos < i.pos)
651  {
652  --i;
653  --ret;
654  }
655  return ret;
656  }
657 
660  {
661  if (pos >= ref->size_raw())
662  {
663  const uchar16_t* a = ref->c_str();
664  u32 p = ref->size_raw();
665  if (UTF16_IS_SURROGATE_LO(a[p]))
666  --p;
667  reference ret(ref, p);
668  return ret;
669  }
670  const_reference ret(ref, pos);
671  return ret;
672  }
673 
676  {
677  if (pos >= ref->size_raw())
678  {
679  const uchar16_t* a = ref->c_str();
680  u32 p = ref->size_raw();
681  if (UTF16_IS_SURROGATE_LO(a[p]))
682  --p;
683  reference ret(ref, p);
684  return ret;
685  }
686  reference ret(ref, pos);
687  return ret;
688  }
689 
692  {
693  return operator*();
694  }
695 
698  {
699  return operator*();
700  }
701 
703  bool atStart() const
704  {
705  return pos == 0;
706  }
707 
709  bool atEnd() const
710  {
711  const uchar16_t* a = ref->c_str();
712  if (UTF16_IS_SURROGATE(a[pos]))
713  return (pos + 1) >= ref->size_raw();
714  else return pos >= ref->size_raw();
715  }
716 
718  void toStart()
719  {
720  pos = 0;
721  }
722 
724  void toEnd()
725  {
726  pos = ref->size_raw();
727  }
728 
731  u32 getPos() const
732  {
733  return pos;
734  }
735 
736  protected:
738  u32 pos;
739  };
740 
743  {
744  public:
749 
750 
751  typedef typename _Base::value_type value_type;
754  typedef access pointer;
755  typedef access reference;
756 
757  using _Base::pos;
758  using _Base::ref;
759 
764 
767  {
768  if (pos >= ref->size_raw())
769  {
770  const uchar16_t* a = ref->c_str();
771  u32 p = ref->size_raw();
772  if (UTF16_IS_SURROGATE_LO(a[p]))
773  --p;
774  reference ret(ref, p);
775  return ret;
776  }
777  reference ret(ref, pos);
778  return ret;
779  }
780 
783  {
784  if (pos >= ref->size_raw())
785  {
786  const uchar16_t* a = ref->c_str();
787  u32 p = ref->size_raw();
788  if (UTF16_IS_SURROGATE_LO(a[p]))
789  --p;
790  reference ret(ref, p);
791  return ret;
792  }
793  reference ret(ref, pos);
794  return ret;
795  }
796 
799  {
800  return operator*();
801  }
802 
805  {
806  return operator*();
807  }
808  };
809 
812 
816 
819  : array(0), allocated(1), used(0)
820  {
821 #if __BYTE_ORDER == __BIG_ENDIAN
823 #else
825 #endif
826  array = allocator.allocate(1); // new u16[1];
827  array[0] = 0x0;
828  }
829 
830 
833  : array(0), allocated(0), used(0)
834  {
835 #if __BYTE_ORDER == __BIG_ENDIAN
837 #else
839 #endif
840  *this = other;
841  }
842 
843 
845  template <class B, class A>
846  ustring16(const string<B, A>& other)
847  : array(0), allocated(0), used(0)
848  {
849 #if __BYTE_ORDER == __BIG_ENDIAN
851 #else
853 #endif
854  *this = other;
855  }
856 
857 
858 #ifndef USTRING_NO_STL
860  template <class B, class A, typename Alloc>
861  ustring16(const std::basic_string<B, A, Alloc>& other)
862  : array(0), allocated(0), used(0)
863  {
864 #if __BYTE_ORDER == __BIG_ENDIAN
866 #else
868 #endif
869  *this = other.c_str();
870  }
871 
872 
874  template <typename Itr>
875  ustring16(Itr first, Itr last)
876  : array(0), allocated(0), used(0)
877  {
878 #if __BYTE_ORDER == __BIG_ENDIAN
880 #else
882 #endif
883  reserve(std::distance(first, last));
884  array[used] = 0;
885 
886  for (; first != last; ++first)
887  append((uchar32_t)*first);
888  }
889 #endif
890 
891 
892 #ifndef USTRING_CPP0X_NEWLITERALS
894  ustring16(const char* const c)
895  : array(0), allocated(0), used(0)
896  {
897 #if __BYTE_ORDER == __BIG_ENDIAN
899 #else
901 #endif
902 
903  loadDataStream(c, strlen(c));
904  //append((uchar8_t*)c);
905  }
906 
907 
909  ustring16(const char* const c, u32 length)
910  : array(0), allocated(0), used(0)
911  {
912 #if __BYTE_ORDER == __BIG_ENDIAN
914 #else
916 #endif
917 
918  loadDataStream(c, length);
919  }
920 #endif
921 
922 
924  ustring16(const uchar8_t* const c)
925  : array(0), allocated(0), used(0)
926  {
927 #if __BYTE_ORDER == __BIG_ENDIAN
929 #else
931 #endif
932 
933  append(c);
934  }
935 
936 
938  ustring16(const char c)
939  : array(0), allocated(0), used(0)
940  {
941 #if __BYTE_ORDER == __BIG_ENDIAN
943 #else
945 #endif
946 
947  append((uchar32_t)c);
948  }
949 
950 
952  ustring16(const uchar8_t* const c, u32 length)
953  : array(0), allocated(0), used(0)
954  {
955 #if __BYTE_ORDER == __BIG_ENDIAN
957 #else
959 #endif
960 
961  append(c, length);
962  }
963 
964 
966  ustring16(const uchar16_t* const c)
967  : array(0), allocated(0), used(0)
968  {
969 #if __BYTE_ORDER == __BIG_ENDIAN
971 #else
973 #endif
974 
975  append(c);
976  }
977 
978 
980  ustring16(const uchar16_t* const c, u32 length)
981  : array(0), allocated(0), used(0)
982  {
983 #if __BYTE_ORDER == __BIG_ENDIAN
985 #else
987 #endif
988 
989  append(c, length);
990  }
991 
992 
994  ustring16(const uchar32_t* const c)
995  : array(0), allocated(0), used(0)
996  {
997 #if __BYTE_ORDER == __BIG_ENDIAN
999 #else
1001 #endif
1002 
1003  append(c);
1004  }
1005 
1006 
1008  ustring16(const uchar32_t* const c, u32 length)
1009  : array(0), allocated(0), used(0)
1010  {
1011 #if __BYTE_ORDER == __BIG_ENDIAN
1013 #else
1015 #endif
1016 
1017  append(c, length);
1018  }
1019 
1020 
1022  ustring16(const wchar_t* const c)
1023  : array(0), allocated(0), used(0)
1024  {
1025 #if __BYTE_ORDER == __BIG_ENDIAN
1027 #else
1029 #endif
1030 
1031  if (sizeof(wchar_t) == 4)
1032  append(reinterpret_cast<const uchar32_t* const>(c));
1033  else if (sizeof(wchar_t) == 2)
1034  append(reinterpret_cast<const uchar16_t* const>(c));
1035  else if (sizeof(wchar_t) == 1)
1036  append(reinterpret_cast<const uchar8_t* const>(c));
1037  }
1038 
1039 
1041  ustring16(const wchar_t* const c, u32 length)
1042  : array(0), allocated(0), used(0)
1043  {
1044 #if __BYTE_ORDER == __BIG_ENDIAN
1046 #else
1048 #endif
1049 
1050  if (sizeof(wchar_t) == 4)
1051  append(reinterpret_cast<const uchar32_t* const>(c), length);
1052  else if (sizeof(wchar_t) == 2)
1053  append(reinterpret_cast<const uchar16_t* const>(c), length);
1054  else if (sizeof(wchar_t) == 1)
1055  append(reinterpret_cast<const uchar8_t* const>(c), length);
1056  }
1057 
1058 
1059 #ifdef USTRING_CPP0X
1061  ustring16(ustring16<TAlloc>&& other)
1062  : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1063  {
1064  //std::cout << "MOVE constructor" << std::endl;
1065  other.array = 0;
1066  other.allocated = 0;
1067  other.used = 0;
1068  }
1069 #endif
1070 
1071 
1074  {
1075  allocator.deallocate(array); // delete [] array;
1076  }
1077 
1078 
1081  {
1082  if (this == &other)
1083  return *this;
1084 
1085  used = other.size_raw();
1086  if (used >= allocated)
1087  {
1088  allocator.deallocate(array); // delete [] array;
1089  allocated = used + 1;
1090  array = allocator.allocate(used + 1); //new u16[used];
1091  }
1092 
1093  const uchar16_t* p = other.c_str();
1094  for (u32 i=0; i<=used; ++i, ++p)
1095  array[i] = *p;
1096 
1097  array[used] = 0;
1098 
1099  // Validate our new UTF-16 string.
1100  validate();
1101 
1102  return *this;
1103  }
1104 
1105 
1106 #ifdef USTRING_CPP0X
1109  {
1110  if (this != &other)
1111  {
1112  //std::cout << "MOVE operator=" << std::endl;
1113  allocator.deallocate(array);
1114 
1115  array = other.array;
1116  allocated = other.allocated;
1117  encoding = other.encoding;
1118  used = other.used;
1119  other.array = 0;
1120  other.used = 0;
1121  }
1122  return *this;
1123  }
1124 #endif
1125 
1126 
1128  template <class B, class A>
1129  ustring16<TAlloc>& operator=(const string<B, A>& other)
1130  {
1131  *this = other.c_str();
1132  return *this;
1133  }
1134 
1135 
1138  {
1139  if (!array)
1140  {
1141  array = allocator.allocate(1); //new u16[1];
1142  allocated = 1;
1143  }
1144  used = 0;
1145  array[used] = 0x0;
1146  if (!c) return *this;
1147 
1149  append(c);
1150  return *this;
1151  }
1152 
1153 
1156  {
1157  if (!array)
1158  {
1159  array = allocator.allocate(1); //new u16[1];
1160  allocated = 1;
1161  }
1162  used = 0;
1163  array[used] = 0x0;
1164  if (!c) return *this;
1165 
1167  append(c);
1168  return *this;
1169  }
1170 
1171 
1174  {
1175  if (!array)
1176  {
1177  array = allocator.allocate(1); //new u16[1];
1178  allocated = 1;
1179  }
1180  used = 0;
1181  array[used] = 0x0;
1182  if (!c) return *this;
1183 
1185  append(c);
1186  return *this;
1187  }
1188 
1189 
1191 
1194  ustring16<TAlloc>& operator=(const wchar_t* const c)
1195  {
1196  if (sizeof(wchar_t) == 4)
1197  *this = reinterpret_cast<const uchar32_t* const>(c);
1198  else if (sizeof(wchar_t) == 2)
1199  *this = reinterpret_cast<const uchar16_t* const>(c);
1200  else if (sizeof(wchar_t) == 1)
1201  *this = reinterpret_cast<const uchar8_t* const>(c);
1202 
1203  return *this;
1204  }
1205 
1206 
1208 
1209  template <class B>
1210  ustring16<TAlloc>& operator=(const B* const c)
1211  {
1212  if (sizeof(B) == 4)
1213  *this = reinterpret_cast<const uchar32_t* const>(c);
1214  else if (sizeof(B) == 2)
1215  *this = reinterpret_cast<const uchar16_t* const>(c);
1216  else if (sizeof(B) == 1)
1217  *this = reinterpret_cast<const uchar8_t* const>(c);
1218 
1219  return *this;
1220  }
1221 
1222 
1224  access operator [](const u32 index)
1225  {
1226  _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1227  iterator iter(*this, index);
1228  return iter.operator*();
1229  }
1230 
1231 
1233  const access operator [](const u32 index) const
1234  {
1235  _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1236  const_iterator iter(*this, index);
1237  return iter.operator*();
1238  }
1239 
1240 
1242  bool operator ==(const uchar16_t* const str) const
1243  {
1244  if (!str)
1245  return false;
1246 
1247  u32 i;
1248  for(i=0; array[i] && str[i]; ++i)
1249  if (array[i] != str[i])
1250  return false;
1251 
1252  return !array[i] && !str[i];
1253  }
1254 
1255 
1257  bool operator ==(const ustring16<TAlloc>& other) const
1258  {
1259  for(u32 i=0; array[i] && other.array[i]; ++i)
1260  if (array[i] != other.array[i])
1261  return false;
1262 
1263  return used == other.used;
1264  }
1265 
1266 
1268  bool operator <(const ustring16<TAlloc>& other) const
1269  {
1270  for(u32 i=0; array[i] && other.array[i]; ++i)
1271  {
1272  s32 diff = array[i] - other.array[i];
1273  if ( diff )
1274  return diff < 0;
1275  }
1276 
1277  return used < other.used;
1278  }
1279 
1280 
1282  bool operator !=(const uchar16_t* const str) const
1283  {
1284  return !(*this == str);
1285  }
1286 
1287 
1289  bool operator !=(const ustring16<TAlloc>& other) const
1290  {
1291  return !(*this == other);
1292  }
1293 
1294 
1297  u32 size() const
1298  {
1299  const_iterator i(*this, 0);
1300  u32 pos = 0;
1301  while (!i.atEnd())
1302  {
1303  ++i;
1304  ++pos;
1305  }
1306  return pos;
1307  }
1308 
1309 
1312  bool empty() const
1313  {
1314  return (size_raw() == 0);
1315  }
1316 
1317 
1320  const uchar16_t* c_str() const
1321  {
1322  return array;
1323  }
1324 
1325 
1330  bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1331  {
1332  u32 i;
1333  const uchar16_t* oa = other.c_str();
1334  for(i=0; i < n && array[i] && oa[i]; ++i)
1335  if (array[i] != oa[i])
1336  return false;
1337 
1338  // if one (or both) of the strings was smaller then they
1339  // are only equal if they have the same length
1340  return (i == n) || (used == other.used);
1341  }
1342 
1343 
1348  bool equalsn(const uchar16_t* const str, u32 n) const
1349  {
1350  if (!str)
1351  return false;
1352  u32 i;
1353  for(i=0; i < n && array[i] && str[i]; ++i)
1354  if (array[i] != str[i])
1355  return false;
1356 
1357  // if one (or both) of the strings was smaller then they
1358  // are only equal if they have the same length
1359  return (i == n) || (array[i] == 0 && str[i] == 0);
1360  }
1361 
1362 
1367  {
1368  if (used + 2 >= allocated)
1369  reallocate(used + 2);
1370 
1371  if (character > 0xFFFF)
1372  {
1373  used += 2;
1374 
1375  // character will be multibyte, so split it up into a surrogate pair.
1376  uchar16_t x = static_cast<uchar16_t>(character);
1377  uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1378  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1379  array[used-2] = vh;
1380  array[used-1] = vl;
1381  }
1382  else
1383  {
1384  ++used;
1385  array[used-1] = character;
1386  }
1387  array[used] = 0;
1388 
1389  return *this;
1390  }
1391 
1392 
1397  ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1398  {
1399  if (!other)
1400  return *this;
1401 
1402  // Determine if the string is long enough for a BOM.
1403  u32 len = 0;
1404  const uchar8_t* p = other;
1405  do
1406  {
1407  ++len;
1408  } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1409 
1410  // Check for BOM.
1412  if (len == unicode::BOM_ENCODE_UTF8_LEN)
1413  {
1414  if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1415  c_bom = unicode::EUTFE_UTF8;
1416  }
1417 
1418  // If a BOM was found, don't include it in the string.
1419  const uchar8_t* c2 = other;
1420  if (c_bom != unicode::EUTFE_NONE)
1421  {
1422  c2 = other + unicode::BOM_UTF8_LEN;
1423  length -= unicode::BOM_UTF8_LEN;
1424  }
1425 
1426  // Calculate the size of the string to read in.
1427  len = 0;
1428  p = c2;
1429  do
1430  {
1431  ++len;
1432  } while(*p++ && len < length);
1433  if (len > length)
1434  len = length;
1435 
1436  // If we need to grow the array, do it now.
1437  if (used + len >= allocated)
1438  reallocate(used + (len * 2));
1439  u32 start = used;
1440 
1441  // Convert UTF-8 to UTF-16.
1442  u32 pos = start;
1443  for (u32 l = 0; l<len;)
1444  {
1445  ++used;
1446  if (((c2[l] >> 6) & 0x03) == 0x02)
1447  { // Invalid continuation byte.
1449  ++l;
1450  }
1451  else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1452  { // Invalid byte - overlong encoding.
1454  ++l;
1455  }
1456  else if ((c2[l] & 0xF8) == 0xF0)
1457  { // 4 bytes UTF-8, 2 bytes UTF-16.
1458  // Check for a full string.
1459  if ((l + 3) >= len)
1460  {
1462  l += 3;
1463  break;
1464  }
1465 
1466  // Validate.
1467  bool valid = true;
1468  u8 l2 = 0;
1469  if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1470  if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1471  if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1472  if (!valid)
1473  {
1475  l += l2;
1476  continue;
1477  }
1478 
1479  // Decode.
1480  uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1481  uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1482  uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1483  uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1484 
1485  // Split v up into a surrogate pair.
1486  uchar16_t x = static_cast<uchar16_t>(v);
1487  uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1488  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1489 
1490  array[pos++] = vh;
1491  array[pos++] = vl;
1492  l += 4;
1493  ++used; // Using two shorts this time, so increase used by 1.
1494  }
1495  else if ((c2[l] & 0xF0) == 0xE0)
1496  { // 3 bytes UTF-8, 1 byte UTF-16.
1497  // Check for a full string.
1498  if ((l + 2) >= len)
1499  {
1501  l += 2;
1502  break;
1503  }
1504 
1505  // Validate.
1506  bool valid = true;
1507  u8 l2 = 0;
1508  if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1509  if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1510  if (!valid)
1511  {
1513  l += l2;
1514  continue;
1515  }
1516 
1517  // Decode.
1518  uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1519  uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1520  uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1521  array[pos++] = ch;
1522  l += 3;
1523  }
1524  else if ((c2[l] & 0xE0) == 0xC0)
1525  { // 2 bytes UTF-8, 1 byte UTF-16.
1526  // Check for a full string.
1527  if ((l + 1) >= len)
1528  {
1530  l += 1;
1531  break;
1532  }
1533 
1534  // Validate.
1535  if (((c2[l+1] >> 6) & 0x03) != 0x02)
1536  {
1538  ++l;
1539  continue;
1540  }
1541 
1542  // Decode.
1543  uchar8_t b1 = (c2[l] >> 2) & 0x7;
1544  uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1545  uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1546  array[pos++] = ch;
1547  l += 2;
1548  }
1549  else
1550  { // 1 byte UTF-8, 1 byte UTF-16.
1551  // Validate.
1552  if (c2[l] > 0x7F)
1553  { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1555  }
1556  else array[pos++] = static_cast<uchar16_t>(c2[l]);
1557  ++l;
1558  }
1559  }
1560  array[used] = 0;
1561 
1562  // Validate our new UTF-16 string.
1563  validate();
1564 
1565  return *this;
1566  }
1567 
1568 
1573  ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1574  {
1575  if (!other)
1576  return *this;
1577 
1578  // Determine if the string is long enough for a BOM.
1579  u32 len = 0;
1580  const uchar16_t* p = other;
1581  do
1582  {
1583  ++len;
1584  } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1585 
1586  // Check for the BOM to determine the string's endianness.
1589  c_end = unicode::EUTFEE_LITTLE;
1590  else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1591  c_end = unicode::EUTFEE_BIG;
1592 
1593  // If a BOM was found, don't include it in the string.
1594  const uchar16_t* c2 = other;
1595  if (c_end != unicode::EUTFEE_NATIVE)
1596  {
1597  c2 = other + unicode::BOM_UTF16_LEN;
1598  length -= unicode::BOM_UTF16_LEN;
1599  }
1600 
1601  // Calculate the size of the string to read in.
1602  len = 0;
1603  p = c2;
1604  do
1605  {
1606  ++len;
1607  } while(*p++ && len < length);
1608  if (len > length)
1609  len = length;
1610 
1611  // If we need to grow the size of the array, do it now.
1612  if (used + len >= allocated)
1613  reallocate(used + (len * 2));
1614  u32 start = used;
1615  used += len;
1616 
1617  // Copy the string now.
1619  for (u32 l = start; l < start + len; ++l)
1620  {
1621  array[l] = (uchar16_t)c2[l];
1622  if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1624  }
1625 
1626  array[used] = 0;
1627 
1628  // Validate our new UTF-16 string.
1629  validate();
1630  return *this;
1631  }
1632 
1633 
1638  ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1639  {
1640  if (!other)
1641  return *this;
1642 
1643  // Check for the BOM to determine the string's endianness.
1646  c_end = unicode::EUTFEE_LITTLE;
1647  else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1648  c_end = unicode::EUTFEE_BIG;
1649 
1650  // If a BOM was found, don't include it in the string.
1651  const uchar32_t* c2 = other;
1652  if (c_end != unicode::EUTFEE_NATIVE)
1653  {
1654  c2 = other + unicode::BOM_UTF32_LEN;
1655  length -= unicode::BOM_UTF32_LEN;
1656  }
1657 
1658  // Calculate the size of the string to read in.
1659  u32 len = 0;
1660  const uchar32_t* p = c2;
1661  do
1662  {
1663  ++len;
1664  } while(*p++ && len < length);
1665  if (len > length)
1666  len = length;
1667 
1668  // If we need to grow the size of the array, do it now.
1669  // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1670  if (used + (len * 2) >= allocated)
1671  reallocate(used + ((len * 2) * 2));
1672  u32 start = used;
1673 
1674  // Convert UTF-32 to UTF-16.
1676  u32 pos = start;
1677  for (u32 l = 0; l<len; ++l)
1678  {
1679  ++used;
1680 
1681  uchar32_t ch = c2[l];
1682  if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1683  ch = unicode::swapEndian32(ch);
1684 
1685  if (ch > 0xFFFF)
1686  {
1687  // Split ch up into a surrogate pair as it is over 16 bits long.
1688  uchar16_t x = static_cast<uchar16_t>(ch);
1689  uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1690  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1691  array[pos++] = vh;
1692  array[pos++] = vl;
1693  ++used; // Using two shorts, so increased used again.
1694  }
1695  else if (ch >= 0xD800 && ch <= 0xDFFF)
1696  {
1697  // Between possible UTF-16 surrogates (invalid!)
1699  }
1700  else array[pos++] = static_cast<uchar16_t>(ch);
1701  }
1702  array[used] = 0;
1703 
1704  // Validate our new UTF-16 string.
1705  validate();
1706 
1707  return *this;
1708  }
1709 
1710 
1715  {
1716  const uchar16_t* oa = other.c_str();
1717 
1718  u32 len = other.size_raw();
1719 
1720  if (used + len >= allocated)
1721  reallocate(used + len);
1722 
1723  for (u32 l=0; l<len; ++l)
1724  array[used+l] = oa[l];
1725 
1726  used += len;
1727  array[used] = 0;
1728 
1729  return *this;
1730  }
1731 
1732 
1737  ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1738  {
1739  if (other.size() == 0)
1740  return *this;
1741 
1742  if (other.size() < length)
1743  {
1744  append(other);
1745  return *this;
1746  }
1747 
1748  if (used + length * 2 >= allocated)
1749  reallocate(used + length * 2);
1750 
1751  const_iterator iter(other, 0);
1752  u32 l = length;
1753  while (!iter.atEnd() && l)
1754  {
1755  uchar32_t c = *iter;
1756  append(c);
1757  ++iter;
1758  --l;
1759  }
1760 
1761  return *this;
1762  }
1763 
1764 
1767  void reserve(u32 count)
1768  {
1769  if (count < allocated)
1770  return;
1771 
1772  reallocate(count);
1773  }
1774 
1775 
1779  s32 findFirst(uchar32_t c) const
1780  {
1781  const_iterator i(*this, 0);
1782 
1783  s32 pos = 0;
1784  while (!i.atEnd())
1785  {
1786  uchar32_t t = *i;
1787  if (c == t)
1788  return pos;
1789  ++pos;
1790  ++i;
1791  }
1792 
1793  return -1;
1794  }
1795 
1800  s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1801  {
1802  if (!c || !count)
1803  return -1;
1804 
1805  const_iterator i(*this, 0);
1806 
1807  s32 pos = 0;
1808  while (!i.atEnd())
1809  {
1810  uchar32_t t = *i;
1811  for (u32 j=0; j<count; ++j)
1812  if (t == c[j])
1813  return pos;
1814  ++pos;
1815  ++i;
1816  }
1817 
1818  return -1;
1819  }
1820 
1821 
1826  s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1827  {
1828  if (!c || !count)
1829  return -1;
1830 
1831  const_iterator i(*this, 0);
1832 
1833  s32 pos = 0;
1834  while (!i.atEnd())
1835  {
1836  uchar32_t t = *i;
1837  u32 j;
1838  for (j=0; j<count; ++j)
1839  if (t == c[j])
1840  break;
1841 
1842  if (j==count)
1843  return pos;
1844  ++pos;
1845  ++i;
1846  }
1847 
1848  return -1;
1849  }
1850 
1855  s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1856  {
1857  if (!c || !count)
1858  return -1;
1859 
1860  const_iterator i(end());
1861  --i;
1862 
1863  s32 pos = size() - 1;
1864  while (!i.atStart())
1865  {
1866  uchar32_t t = *i;
1867  u32 j;
1868  for (j=0; j<count; ++j)
1869  if (t == c[j])
1870  break;
1871 
1872  if (j==count)
1873  return pos;
1874  --pos;
1875  --i;
1876  }
1877 
1878  return -1;
1879  }
1880 
1885  s32 findNext(uchar32_t c, u32 startPos) const
1886  {
1887  const_iterator i(*this, startPos);
1888 
1889  s32 pos = startPos;
1890  while (!i.atEnd())
1891  {
1892  uchar32_t t = *i;
1893  if (t == c)
1894  return pos;
1895  ++pos;
1896  ++i;
1897  }
1898 
1899  return -1;
1900  }
1901 
1902 
1907  s32 findLast(uchar32_t c, s32 start = -1) const
1908  {
1909  u32 s = size();
1910  start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1911 
1912  const_iterator i(*this, start);
1913  u32 pos = start;
1914  while (!i.atStart())
1915  {
1916  uchar32_t t = *i;
1917  if (t == c)
1918  return pos;
1919  --pos;
1920  --i;
1921  }
1922 
1923  return -1;
1924  }
1925 
1930  s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1931  {
1932  if (!c || !count)
1933  return -1;
1934 
1935  const_iterator i(end());
1936  --i;
1937 
1938  s32 pos = size();
1939  while (!i.atStart())
1940  {
1941  uchar32_t t = *i;
1942  for (u32 j=0; j<count; ++j)
1943  if (t == c[j])
1944  return pos;
1945  --pos;
1946  --i;
1947  }
1948 
1949  return -1;
1950  }
1951 
1952 
1957  s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1958  {
1959  u32 my_size = size();
1960  u32 their_size = str.size();
1961 
1962  if (their_size == 0 || my_size - start < their_size)
1963  return -1;
1964 
1965  const_iterator i(*this, start);
1966 
1967  s32 pos = start;
1968  while (!i.atEnd())
1969  {
1970  const_iterator i2(i);
1971  const_iterator j(str, 0);
1972  uchar32_t t1 = (uchar32_t)*i2;
1973  uchar32_t t2 = (uchar32_t)*j;
1974  while (t1 == t2)
1975  {
1976  ++i2;
1977  ++j;
1978  if (j.atEnd())
1979  return pos;
1980  t1 = (uchar32_t)*i2;
1981  t2 = (uchar32_t)*j;
1982  }
1983  ++i;
1984  ++pos;
1985  }
1986 
1987  return -1;
1988  }
1989 
1990 
1995  s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1996  {
1997  const uchar16_t* data = str.c_str();
1998  if (data && *data)
1999  {
2000  u32 len = 0;
2001 
2002  while (data[len])
2003  ++len;
2004 
2005  if (len > used)
2006  return -1;
2007 
2008  for (u32 i=start; i<=used-len; ++i)
2009  {
2010  u32 j=0;
2011 
2012  while(data[j] && array[i+j] == data[j])
2013  ++j;
2014 
2015  if (!data[j])
2016  return i;
2017  }
2018  }
2019 
2020  return -1;
2021  }
2022 
2023 
2028  ustring16<TAlloc> subString(u32 begin, s32 length) const
2029  {
2030  u32 len = size();
2031  // if start after ustring16
2032  // or no proper substring length
2033  if ((length <= 0) || (begin>=len))
2034  return ustring16<TAlloc>("");
2035  // clamp length to maximal value
2036  if ((length+begin) > len)
2037  length = len-begin;
2038 
2040  o.reserve((length+1) * 2);
2041 
2042  const_iterator i(*this, begin);
2043  while (!i.atEnd() && length)
2044  {
2045  o.append(*i);
2046  ++i;
2047  --length;
2048  }
2049 
2050  return o;
2051  }
2052 
2053 
2058  {
2059  append((uchar32_t)c);
2060  return *this;
2061  }
2062 
2063 
2068  {
2069  append(c);
2070  return *this;
2071  }
2072 
2073 
2078  {
2079  append(core::stringc(c));
2080  return *this;
2081  }
2082 
2083 
2087  ustring16<TAlloc>& operator += (unsigned short c)
2088  {
2089  append(core::stringc(c));
2090  return *this;
2091  }
2092 
2093 
2094 #ifdef USTRING_CPP0X_NEWLITERALS
2099  {
2100  append(core::stringc(c));
2101  return *this;
2102  }
2103 
2104 
2108  ustring16<TAlloc>& operator += (unsigned int c)
2109  {
2110  append(core::stringc(c));
2111  return *this;
2112  }
2113 #endif
2114 
2115 
2120  {
2121  append(core::stringc(c));
2122  return *this;
2123  }
2124 
2125 
2130  {
2131  append(core::stringc(c));
2132  return *this;
2133  }
2134 
2135 
2140  {
2141  append(core::stringc(c));
2142  return *this;
2143  }
2144 
2145 
2150  {
2151  append(c);
2152  return *this;
2153  }
2154 
2155 
2160  {
2161  append(other);
2162  return *this;
2163  }
2164 
2165 
2171  {
2172  iterator i(*this, 0);
2173  while (!i.atEnd())
2174  {
2175  typename ustring16<TAlloc>::access a = *i;
2176  if ((uchar32_t)a == toReplace)
2177  a = replaceWith;
2178  ++i;
2179  }
2180  return *this;
2181  }
2182 
2183 
2188  ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2189  {
2190  if (toReplace.size() == 0)
2191  return *this;
2192 
2193  const uchar16_t* other = toReplace.c_str();
2194  const uchar16_t* replace = replaceWith.c_str();
2195  const u32 other_size = toReplace.size_raw();
2196  const u32 replace_size = replaceWith.size_raw();
2197 
2198  // Determine the delta. The algorithm will change depending on the delta.
2199  s32 delta = replace_size - other_size;
2200 
2201  // A character for character replace. The string will not shrink or grow.
2202  if (delta == 0)
2203  {
2204  s32 pos = 0;
2205  while ((pos = find_raw(other, pos)) != -1)
2206  {
2207  for (u32 i = 0; i < replace_size; ++i)
2208  array[pos + i] = replace[i];
2209  ++pos;
2210  }
2211  return *this;
2212  }
2213 
2214  // We are going to be removing some characters. The string will shrink.
2215  if (delta < 0)
2216  {
2217  u32 i = 0;
2218  for (u32 pos = 0; pos <= used; ++i, ++pos)
2219  {
2220  // Is this potentially a match?
2221  if (array[pos] == *other)
2222  {
2223  // Check to see if we have a match.
2224  u32 j;
2225  for (j = 0; j < other_size; ++j)
2226  {
2227  if (array[pos + j] != other[j])
2228  break;
2229  }
2230 
2231  // If we have a match, replace characters.
2232  if (j == other_size)
2233  {
2234  for (j = 0; j < replace_size; ++j)
2235  array[i + j] = replace[j];
2236  i += replace_size - 1;
2237  pos += other_size - 1;
2238  continue;
2239  }
2240  }
2241 
2242  // No match found, just copy characters.
2243  array[i - 1] = array[pos];
2244  }
2245  array[i] = 0;
2246  used = i;
2247 
2248  return *this;
2249  }
2250 
2251  // We are going to be adding characters, so the string size will increase.
2252  // Count the number of times toReplace exists in the string so we can allocate the new size.
2253  u32 find_count = 0;
2254  s32 pos = 0;
2255  while ((pos = find_raw(other, pos)) != -1)
2256  {
2257  ++find_count;
2258  ++pos;
2259  }
2260 
2261  // Re-allocate the string now, if needed.
2262  u32 len = delta * find_count;
2263  if (used + len >= allocated)
2264  reallocate(used + len);
2265 
2266  // Start replacing.
2267  pos = 0;
2268  while ((pos = find_raw(other, pos)) != -1)
2269  {
2270  uchar16_t* start = array + pos + other_size - 1;
2271  uchar16_t* ptr = array + used;
2272  uchar16_t* end = array + used + delta;
2273 
2274  // Shift characters to make room for the string.
2275  while (ptr != start)
2276  {
2277  *end = *ptr;
2278  --ptr;
2279  --end;
2280  }
2281 
2282  // Add the new string now.
2283  for (u32 i = 0; i < replace_size; ++i)
2284  array[pos + i] = replace[i];
2285 
2286  pos += replace_size;
2287  used += delta;
2288  }
2289 
2290  // Terminate the string and return ourself.
2291  array[used] = 0;
2292  return *this;
2293  }
2294 
2295 
2300  {
2301  u32 pos = 0;
2302  u32 found = 0;
2303  u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2304  for (u32 i=0; i<=used; ++i)
2305  {
2306  uchar32_t uc32 = 0;
2307  if (!UTF16_IS_SURROGATE_HI(array[i]))
2308  uc32 |= array[i];
2309  else if (i + 1 <= used)
2310  {
2311  // Convert the surrogate pair into a single UTF-32 character.
2312  uc32 = unicode::toUTF32(array[i], array[i + 1]);
2313  }
2314  u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2315 
2316  if (uc32 == c)
2317  {
2318  found += len;
2319  continue;
2320  }
2321 
2322  array[pos++] = array[i];
2323  if (len2 == 2)
2324  array[pos++] = array[++i];
2325  }
2326  used -= found;
2327  array[used] = 0;
2328  return *this;
2329  }
2330 
2331 
2336  {
2337  u32 size = toRemove.size_raw();
2338  if (size == 0) return *this;
2339 
2340  const uchar16_t* tra = toRemove.c_str();
2341  u32 pos = 0;
2342  u32 found = 0;
2343  for (u32 i=0; i<=used; ++i)
2344  {
2345  u32 j = 0;
2346  while (j < size)
2347  {
2348  if (array[i + j] != tra[j])
2349  break;
2350  ++j;
2351  }
2352  if (j == size)
2353  {
2354  found += size;
2355  i += size - 1;
2356  continue;
2357  }
2358 
2359  array[pos++] = array[i];
2360  }
2361  used -= found;
2362  array[used] = 0;
2363  return *this;
2364  }
2365 
2366 
2371  {
2372  if (characters.size_raw() == 0)
2373  return *this;
2374 
2375  u32 pos = 0;
2376  u32 found = 0;
2377  const_iterator iter(characters);
2378  for (u32 i=0; i<=used; ++i)
2379  {
2380  uchar32_t uc32 = 0;
2381  if (!UTF16_IS_SURROGATE_HI(array[i]))
2382  uc32 |= array[i];
2383  else if (i + 1 <= used)
2384  {
2385  // Convert the surrogate pair into a single UTF-32 character.
2386  uc32 = unicode::toUTF32(array[i], array[i+1]);
2387  }
2388  u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2389 
2390  bool cont = false;
2391  iter.toStart();
2392  while (!iter.atEnd())
2393  {
2394  uchar32_t c = *iter;
2395  if (uc32 == c)
2396  {
2397  found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2398  ++i;
2399  cont = true;
2400  break;
2401  }
2402  ++iter;
2403  }
2404  if (cont) continue;
2405 
2406  array[pos++] = array[i];
2407  if (len2 == 2)
2408  array[pos++] = array[++i];
2409  }
2410  used -= found;
2411  array[used] = 0;
2412  return *this;
2413  }
2414 
2415 
2420  ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2421  {
2422  core::array<uchar32_t> utf32white = whitespace.toUTF32();
2423 
2424  // find start and end of the substring without the specified characters
2425  const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2426  if (begin == -1)
2427  return (*this="");
2428 
2429  const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2430 
2431  return (*this = subString(begin, (end +1) - begin));
2432  }
2433 
2434 
2440  {
2441  _IRR_DEBUG_BREAK_IF(index>used) // access violation
2442 
2443  iterator i(*this, index);
2444 
2445  uchar32_t t = *i;
2446  u32 len = (t > 0xFFFF ? 2 : 1);
2447 
2448  for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2449  array[j - len] = array[j];
2450 
2451  used -= len;
2452  array[used] = 0;
2453 
2454  return *this;
2455  }
2456 
2457 
2461  {
2462  // Validate all unicode characters.
2463  for (u32 i=0; i<allocated; ++i)
2464  {
2465  // Terminate on existing null.
2466  if (array[i] == 0)
2467  {
2468  used = i;
2469  return *this;
2470  }
2471  if (UTF16_IS_SURROGATE(array[i]))
2472  {
2473  if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2475  else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2477  ++i;
2478  }
2479  if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2481  }
2482 
2483  // terminate
2484  used = 0;
2485  if (allocated > 0)
2486  {
2487  used = allocated - 1;
2488  array[used] = 0;
2489  }
2490  return *this;
2491  }
2492 
2493 
2497  {
2498  if (used < 1)
2499  return 0;
2500 
2502  {
2503  // Make sure we have a paired surrogate.
2504  if (used < 2)
2505  return 0;
2506 
2507  // Check for an invalid surrogate.
2508  if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2509  return 0;
2510 
2511  // Convert the surrogate pair into a single UTF-32 character.
2512  return unicode::toUTF32(array[used-2], array[used-1]);
2513  }
2514  else
2515  {
2516  return array[used-1];
2517  }
2518  }
2519 
2520 
2522 
2539  template<class container>
2540  u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2541  {
2542  if (!c)
2543  return 0;
2544 
2545  const_iterator i(*this);
2546  const u32 oldSize=ret.size();
2547  u32 pos = 0;
2548  u32 lastpos = 0;
2549  u32 lastpospos = 0;
2550  bool lastWasSeparator = false;
2551  while (!i.atEnd())
2552  {
2553  uchar32_t ch = *i;
2554  bool foundSeparator = false;
2555  for (u32 j=0; j<count; ++j)
2556  {
2557  if (ch == c[j])
2558  {
2559  if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2560  !lastWasSeparator)
2561  ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2562  foundSeparator = true;
2563  lastpos = (keepSeparators ? pos : pos + 1);
2564  lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2565  break;
2566  }
2567  }
2568  lastWasSeparator = foundSeparator;
2569  ++pos;
2570  ++i;
2571  }
2572  u32 s = size() + 1;
2573  if (s > lastpos)
2574  ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2575  return ret.size()-oldSize;
2576  }
2577 
2578 
2580 
2596  template<class container>
2597  u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2598  {
2599  core::array<uchar32_t> v = c.toUTF32();
2600  return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2601  }
2602 
2603 
2606  u32 capacity() const
2607  {
2608  return allocated;
2609  }
2610 
2611 
2614  u32 size_raw() const
2615  {
2616  return used;
2617  }
2618 
2619 
2625  {
2626  u8 len = (c > 0xFFFF ? 2 : 1);
2627 
2628  if (used + len >= allocated)
2629  reallocate(used + len);
2630 
2631  used += len;
2632 
2633  iterator iter(*this, pos);
2634  for (u32 i = used - 2; i > iter.getPos(); --i)
2635  array[i] = array[i - len];
2636 
2637  if (c > 0xFFFF)
2638  {
2639  // c will be multibyte, so split it up into a surrogate pair.
2640  uchar16_t x = static_cast<uchar16_t>(c);
2641  uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2642  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2643  array[iter.getPos()] = vh;
2644  array[iter.getPos()+1] = vl;
2645  }
2646  else
2647  {
2648  array[iter.getPos()] = static_cast<uchar16_t>(c);
2649  }
2650  array[used] = 0;
2651  return *this;
2652  }
2653 
2654 
2660  {
2661  u32 len = c.size_raw();
2662  if (len == 0) return *this;
2663 
2664  if (used + len >= allocated)
2665  reallocate(used + len);
2666 
2667  used += len;
2668 
2669  iterator iter(*this, pos);
2670  for (u32 i = used - 2; i > iter.getPos() + len; --i)
2671  array[i] = array[i - len];
2672 
2673  const uchar16_t* s = c.c_str();
2674  for (u32 i = 0; i < len; ++i)
2675  {
2676  array[pos++] = *s;
2677  ++s;
2678  }
2679 
2680  array[used] = 0;
2681  return *this;
2682  }
2683 
2684 
2690  {
2691  if (used + 1 >= allocated)
2692  reallocate(used + 1);
2693 
2694  ++used;
2695 
2696  for (u32 i = used - 1; i > pos; --i)
2697  array[i] = array[i - 1];
2698 
2699  array[pos] = c;
2700  array[used] = 0;
2701  return *this;
2702  }
2703 
2704 
2709  {
2710  for (u32 i=pos; i<=used; ++i)
2711  {
2712  array[i] = array[i + 1];
2713  }
2714  --used;
2715  array[used] = 0;
2716  return *this;
2717  }
2718 
2719 
2725  {
2726  array[pos] = c;
2727  return *this;
2728  }
2729 
2730 
2734  {
2735  iterator i(*this, 0);
2736  return i;
2737  }
2738 
2739 
2743  {
2744  const_iterator i(*this, 0);
2745  return i;
2746  }
2747 
2748 
2752  {
2753  const_iterator i(*this, 0);
2754  return i;
2755  }
2756 
2757 
2761  {
2762  iterator i(*this, 0);
2763  i.toEnd();
2764  return i;
2765  }
2766 
2767 
2771  {
2772  const_iterator i(*this, 0);
2773  i.toEnd();
2774  return i;
2775  }
2776 
2777 
2781  {
2782  const_iterator i(*this, 0);
2783  i.toEnd();
2784  return i;
2785  }
2786 
2787 
2791  core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2792  {
2793  core::string<uchar8_t> ret;
2794  ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2795  const_iterator iter(*this, 0);
2796 
2797  // Add the byte order mark if the user wants it.
2798  if (addBOM)
2799  {
2800  ret.append(unicode::BOM_ENCODE_UTF8[0]);
2801  ret.append(unicode::BOM_ENCODE_UTF8[1]);
2802  ret.append(unicode::BOM_ENCODE_UTF8[2]);
2803  }
2804 
2805  while (!iter.atEnd())
2806  {
2807  uchar32_t c = *iter;
2808  if (c > 0xFFFF)
2809  { // 4 bytes
2810  uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2811  uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2812  uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2813  uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2814  ret.append(b1);
2815  ret.append(b2);
2816  ret.append(b3);
2817  ret.append(b4);
2818  }
2819  else if (c > 0x7FF)
2820  { // 3 bytes
2821  uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2822  uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2823  uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2824  ret.append(b1);
2825  ret.append(b2);
2826  ret.append(b3);
2827  }
2828  else if (c > 0x7F)
2829  { // 2 bytes
2830  uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2831  uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2832  ret.append(b1);
2833  ret.append(b2);
2834  }
2835  else
2836  { // 1 byte
2837  ret.append(static_cast<uchar8_t>(c));
2838  }
2839  ++iter;
2840  }
2841  return ret;
2842  }
2843 
2844 
2848  core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2849  {
2850  core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2851  const_iterator iter(*this, 0);
2852 
2853  // Add the byte order mark if the user wants it.
2854  if (addBOM)
2855  {
2856  ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2857  ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2858  ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2859  }
2860 
2861  while (!iter.atEnd())
2862  {
2863  uchar32_t c = *iter;
2864  if (c > 0xFFFF)
2865  { // 4 bytes
2866  uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2867  uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2868  uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2869  uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2870  ret.push_back(b1);
2871  ret.push_back(b2);
2872  ret.push_back(b3);
2873  ret.push_back(b4);
2874  }
2875  else if (c > 0x7FF)
2876  { // 3 bytes
2877  uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2878  uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2879  uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2880  ret.push_back(b1);
2881  ret.push_back(b2);
2882  ret.push_back(b3);
2883  }
2884  else if (c > 0x7F)
2885  { // 2 bytes
2886  uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2887  uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2888  ret.push_back(b1);
2889  ret.push_back(b2);
2890  }
2891  else
2892  { // 1 byte
2893  ret.push_back(static_cast<uchar8_t>(c));
2894  }
2895  ++iter;
2896  }
2897  ret.push_back(0);
2898  return ret;
2899  }
2900 
2901 
2902 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2907  core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2908  {
2909  core::string<char16_t> ret;
2910  ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2911 
2912  // Add the BOM if specified.
2913  if (addBOM)
2914  {
2915  if (endian == unicode::EUTFEE_NATIVE)
2916  ret[0] = unicode::BOM;
2917  else if (endian == unicode::EUTFEE_LITTLE)
2918  {
2919  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(&ret[0]);
2920  *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2921  *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2922  }
2923  else
2924  {
2925  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(&ret[0]);
2926  *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2927  *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2928  }
2929  }
2930 
2931  ret.append(array);
2932  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2933  {
2934  char16_t* ptr = ret.c_str();
2935  for (u32 i = 0; i < ret.size(); ++i)
2936  *ptr++ = unicode::swapEndian16(*ptr);
2937  }
2938  return ret;
2939  }
2940 #endif
2941 
2942 
2948  core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2949  {
2950  core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2951  uchar16_t* ptr = ret.pointer();
2952 
2953  // Add the BOM if specified.
2954  if (addBOM)
2955  {
2956  if (endian == unicode::EUTFEE_NATIVE)
2957  *ptr = unicode::BOM;
2958  else if (endian == unicode::EUTFEE_LITTLE)
2959  {
2960  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2961  *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2962  *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2963  }
2964  else
2965  {
2966  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2967  *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2968  *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2969  }
2970  ++ptr;
2971  }
2972 
2973  memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2974  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2975  {
2976  for (u32 i = 0; i <= used; ++i)
2977  ptr[i] = unicode::swapEndian16(ptr[i]);
2978  }
2979  ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2980  ret.push_back(0);
2981  return ret;
2982  }
2983 
2984 
2985 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2990  core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2991  {
2992  core::string<char32_t> ret;
2993  ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2994  const_iterator iter(*this, 0);
2995 
2996  // Add the BOM if specified.
2997  if (addBOM)
2998  {
2999  if (endian == unicode::EUTFEE_NATIVE)
3000  ret.append(unicode::BOM);
3001  else
3002  {
3003  union
3004  {
3005  uchar32_t full;
3006  u8 chunk[4];
3007  } t;
3008 
3009  if (endian == unicode::EUTFEE_LITTLE)
3010  {
3011  t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3012  t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3013  t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3014  t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3015  }
3016  else
3017  {
3018  t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3019  t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3020  t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3021  t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3022  }
3023  ret.append(t.full);
3024  }
3025  }
3026 
3027  while (!iter.atEnd())
3028  {
3029  uchar32_t c = *iter;
3030  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3031  c = unicode::swapEndian32(c);
3032  ret.append(c);
3033  ++iter;
3034  }
3035  return ret;
3036  }
3037 #endif
3038 
3039 
3045  core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3046  {
3047  core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3048  const_iterator iter(*this, 0);
3049 
3050  // Add the BOM if specified.
3051  if (addBOM)
3052  {
3053  if (endian == unicode::EUTFEE_NATIVE)
3054  ret.push_back(unicode::BOM);
3055  else
3056  {
3057  union
3058  {
3059  uchar32_t full;
3060  u8 chunk[4];
3061  } t;
3062 
3063  if (endian == unicode::EUTFEE_LITTLE)
3064  {
3065  t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3066  t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3067  t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3068  t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3069  }
3070  else
3071  {
3072  t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3073  t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3074  t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3075  t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3076  }
3077  ret.push_back(t.full);
3078  }
3079  }
3080  ret.push_back(0);
3081 
3082  while (!iter.atEnd())
3083  {
3084  uchar32_t c = *iter;
3085  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3086  c = unicode::swapEndian32(c);
3087  ret.push_back(c);
3088  ++iter;
3089  }
3090  return ret;
3091  }
3092 
3093 
3095 
3100  core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3101  {
3102  if (sizeof(wchar_t) == 4)
3103  {
3104  core::array<uchar32_t> a(toUTF32(endian, addBOM));
3105  core::stringw ret(a.pointer());
3106  return ret;
3107  }
3108  else if (sizeof(wchar_t) == 2)
3109  {
3110  if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3111  {
3112  core::stringw ret(array);
3113  return ret;
3114  }
3115  else
3116  {
3117  core::array<uchar16_t> a(toUTF16(endian, addBOM));
3118  core::stringw ret(a.pointer());
3119  return ret;
3120  }
3121  }
3122  else if (sizeof(wchar_t) == 1)
3123  {
3124  core::array<uchar8_t> a(toUTF8(addBOM));
3125  core::stringw ret(a.pointer());
3126  return ret;
3127  }
3128 
3129  // Shouldn't happen.
3130  return core::stringw();
3131  }
3132 
3133 
3135 
3140  core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3141  {
3142  if (sizeof(wchar_t) == 4)
3143  {
3144  core::array<uchar32_t> a(toUTF32(endian, addBOM));
3145  core::array<wchar_t> ret(a.size());
3146  ret.set_used(a.size());
3147  memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3148  return ret;
3149  }
3150  if (sizeof(wchar_t) == 2)
3151  {
3152  if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3153  {
3154  core::array<wchar_t> ret(used);
3155  ret.set_used(used);
3156  memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3157  return ret;
3158  }
3159  else
3160  {
3161  core::array<uchar16_t> a(toUTF16(endian, addBOM));
3162  core::array<wchar_t> ret(a.size());
3163  ret.set_used(a.size());
3164  memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3165  return ret;
3166  }
3167  }
3168  if (sizeof(wchar_t) == 1)
3169  {
3170  core::array<uchar8_t> a(toUTF8(addBOM));
3171  core::array<wchar_t> ret(a.size());
3172  ret.set_used(a.size());
3173  memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3174  return ret;
3175  }
3176 
3177  // Shouldn't happen.
3178  return core::array<wchar_t>();
3179  }
3180 
3185  io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3186  {
3187 #if defined(_IRR_WCHAR_FILESYSTEM)
3188  return toWCHAR_s(endian, addBOM);
3189 #else
3190  return toUTF8_s(addBOM);
3191 #endif
3192  }
3193 
3199  ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3200  {
3201  // Clear our string.
3202  *this = "";
3203  if (!data)
3204  return *this;
3205 
3207  switch (e)
3208  {
3209  default:
3210  case unicode::EUTFE_UTF8:
3211  append((uchar8_t*)data, data_size);
3212  break;
3213 
3214  case unicode::EUTFE_UTF16:
3217  append((uchar16_t*)data, data_size / 2);
3218  break;
3219 
3220  case unicode::EUTFE_UTF32:
3223  append((uchar32_t*)data, data_size / 4);
3224  break;
3225  }
3226 
3227  return *this;
3228  }
3229 
3233  {
3234  return encoding;
3235  }
3236 
3240  {
3243  return unicode::EUTFEE_LITTLE;
3244  else return unicode::EUTFEE_BIG;
3245  }
3246 
3247 private:
3248 
3251  void reallocate(u32 new_size)
3252  {
3253  uchar16_t* old_array = array;
3254 
3255  array = allocator.allocate(new_size + 1); //new u16[new_size];
3256  allocated = new_size + 1;
3257  if (old_array == 0) return;
3258 
3259  u32 amount = used < new_size ? used : new_size;
3260  for (u32 i=0; i<=amount; ++i)
3261  array[i] = old_array[i];
3262 
3263  if (allocated <= used)
3264  used = allocated - 1;
3265 
3266  array[used] = 0;
3267 
3268  allocator.deallocate(old_array); // delete [] old_array;
3269  }
3270 
3271  //--- member variables
3272 
3276  u32 used;
3277  TAlloc allocator;
3278  //irrAllocator<uchar16_t> allocator;
3279 };
3280 
3282 
3283 
3285 template <typename TAlloc>
3287 {
3288  ustring16<TAlloc> ret(left);
3289  ret += right;
3290  return ret;
3291 }
3292 
3293 
3295 template <typename TAlloc, class B>
3296 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3297 {
3298  ustring16<TAlloc> ret(left);
3299  ret += right;
3300  return ret;
3301 }
3302 
3303 
3305 template <class B, typename TAlloc>
3306 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3307 {
3308  ustring16<TAlloc> ret(left);
3309  ret += right;
3310  return ret;
3311 }
3312 
3313 
3315 template <typename TAlloc, typename B, typename BAlloc>
3316 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3317 {
3318  ustring16<TAlloc> ret(left);
3319  ret += right;
3320  return ret;
3321 }
3322 
3323 
3325 template <typename TAlloc, typename B, typename BAlloc>
3326 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3327 {
3328  ustring16<TAlloc> ret(left);
3329  ret += right;
3330  return ret;
3331 }
3332 
3333 
3335 template <typename TAlloc, typename B, typename A, typename BAlloc>
3336 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3337 {
3338  ustring16<TAlloc> ret(left);
3339  ret += right;
3340  return ret;
3341 }
3342 
3343 
3345 template <typename TAlloc, typename B, typename A, typename BAlloc>
3346 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3347 {
3348  ustring16<TAlloc> ret(left);
3349  ret += right;
3350  return ret;
3351 }
3352 
3353 
3355 template <typename TAlloc>
3356 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3357 {
3358  ustring16<TAlloc> ret(left);
3359  ret += right;
3360  return ret;
3361 }
3362 
3363 
3365 template <typename TAlloc>
3366 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3367 {
3368  ustring16<TAlloc> ret(left);
3369  ret += right;
3370  return ret;
3371 }
3372 
3373 
3374 #ifdef USTRING_CPP0X_NEWLITERALS
3376 template <typename TAlloc>
3377 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3378 {
3379  ustring16<TAlloc> ret(left);
3380  ret += right;
3381  return ret;
3382 }
3383 
3384 
3386 template <typename TAlloc>
3387 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3388 {
3389  ustring16<TAlloc> ret(left);
3390  ret += right;
3391  return ret;
3392 }
3393 #endif
3394 
3395 
3397 template <typename TAlloc>
3398 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3399 {
3400  ustring16<TAlloc> ret(left);
3401  ret += core::stringc(right);
3402  return ret;
3403 }
3404 
3405 
3407 template <typename TAlloc>
3408 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3409 {
3410  ustring16<TAlloc> ret((core::stringc(left)));
3411  ret += right;
3412  return ret;
3413 }
3414 
3415 
3417 template <typename TAlloc>
3418 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3419 {
3420  ustring16<TAlloc> ret(left);
3421  ret += core::stringc(right);
3422  return ret;
3423 }
3424 
3425 
3427 template <typename TAlloc>
3428 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3429 {
3430  ustring16<TAlloc> ret((core::stringc(left)));
3431  ret += right;
3432  return ret;
3433 }
3434 
3435 
3437 template <typename TAlloc>
3438 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3439 {
3440  ustring16<TAlloc> ret(left);
3441  ret += core::stringc(right);
3442  return ret;
3443 }
3444 
3445 
3447 template <typename TAlloc>
3448 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3449 {
3450  ustring16<TAlloc> ret((core::stringc(left)));
3451  ret += right;
3452  return ret;
3453 }
3454 
3455 
3457 template <typename TAlloc>
3458 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3459 {
3460  ustring16<TAlloc> ret(left);
3461  ret += core::stringc(right);
3462  return ret;
3463 }
3464 
3465 
3467 template <typename TAlloc>
3468 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3469 {
3470  ustring16<TAlloc> ret((core::stringc(left)));
3471  ret += right;
3472  return ret;
3473 }
3474 
3475 
3477 template <typename TAlloc>
3478 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3479 {
3480  ustring16<TAlloc> ret(left);
3481  ret += core::stringc(right);
3482  return ret;
3483 }
3484 
3485 
3487 template <typename TAlloc>
3488 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3489 {
3490  ustring16<TAlloc> ret((core::stringc(left)));
3491  ret += right;
3492  return ret;
3493 }
3494 
3495 
3497 template <typename TAlloc>
3498 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3499 {
3500  ustring16<TAlloc> ret(left);
3501  ret += core::stringc(right);
3502  return ret;
3503 }
3504 
3505 
3507 template <typename TAlloc>
3508 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3509 {
3510  ustring16<TAlloc> ret((core::stringc(left)));
3511  ret += right;
3512  return ret;
3513 }
3514 
3515 
3517 template <typename TAlloc>
3518 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3519 {
3520  ustring16<TAlloc> ret(left);
3521  ret += core::stringc(right);
3522  return ret;
3523 }
3524 
3525 
3527 template <typename TAlloc>
3528 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3529 {
3530  ustring16<TAlloc> ret((core::stringc(left)));
3531  ret += right;
3532  return ret;
3533 }
3534 
3535 
3537 template <typename TAlloc>
3538 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3539 {
3540  ustring16<TAlloc> ret(left);
3541  ret += core::stringc(right);
3542  return ret;
3543 }
3544 
3545 
3547 template <typename TAlloc>
3548 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3549 {
3550  ustring16<TAlloc> ret((core::stringc(left)));
3551  ret += right;
3552  return ret;
3553 }
3554 
3555 
3556 #ifdef USTRING_CPP0X
3558 template <typename TAlloc>
3559 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3560 {
3561  //std::cout << "MOVE operator+(&, &&)" << std::endl;
3562  right.insert(left, 0);
3563  return std::move(right);
3564 }
3565 
3566 
3568 template <typename TAlloc>
3569 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3570 {
3571  //std::cout << "MOVE operator+(&&, &)" << std::endl;
3572  left.append(right);
3573  return std::move(left);
3574 }
3575 
3576 
3578 template <typename TAlloc>
3579 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3580 {
3581  //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3582  if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3583  (right.capacity() - right.size_raw() < left.size_raw()))
3584  {
3585  left.append(right);
3586  return std::move(left);
3587  }
3588  else
3589  {
3590  right.insert(left, 0);
3591  return std::move(right);
3592  }
3593 }
3594 
3595 
3597 template <typename TAlloc, class B>
3598 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3599 {
3600  //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3601  left.append(right);
3602  return std::move(left);
3603 }
3604 
3605 
3607 template <class B, typename TAlloc>
3608 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3609 {
3610  //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3611  right.insert(left, 0);
3612  return std::move(right);
3613 }
3614 
3615 
3617 template <typename TAlloc, typename B, typename BAlloc>
3618 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3619 {
3620  //std::cout << "MOVE operator+(&, &&)" << std::endl;
3621  right.insert(left, 0);
3622  return std::move(right);
3623 }
3624 
3625 
3627 template <typename TAlloc, typename B, typename BAlloc>
3628 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3629 {
3630  //std::cout << "MOVE operator+(&&, &)" << std::endl;
3631  left.append(right);
3632  return std::move(left);
3633 }
3634 
3635 
3637 template <typename TAlloc, typename B, typename A, typename BAlloc>
3638 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3639 {
3640  //std::cout << "MOVE operator+(&, &&)" << std::endl;
3641  right.insert(core::ustring16<TAlloc>(left), 0);
3642  return std::move(right);
3643 }
3644 
3645 
3647 template <typename TAlloc, typename B, typename A, typename BAlloc>
3648 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3649 {
3650  //std::cout << "MOVE operator+(&&, &)" << std::endl;
3651  left.append(right);
3652  return std::move(left);
3653 }
3654 
3655 
3657 template <typename TAlloc>
3658 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3659 {
3660  left.append((uchar32_t)right);
3661  return std::move(left);
3662 }
3663 
3664 
3666 template <typename TAlloc>
3667 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3668 {
3669  right.insert((uchar32_t)left, 0);
3670  return std::move(right);
3671 }
3672 
3673 
3674 #ifdef USTRING_CPP0X_NEWLITERALS
3676 template <typename TAlloc>
3677 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3678 {
3679  left.append(right);
3680  return std::move(left);
3681 }
3682 
3683 
3685 template <typename TAlloc>
3686 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3687 {
3688  right.insert(left, 0);
3689  return std::move(right);
3690 }
3691 #endif
3692 
3693 
3695 template <typename TAlloc>
3696 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3697 {
3698  left.append(core::stringc(right));
3699  return std::move(left);
3700 }
3701 
3702 
3704 template <typename TAlloc>
3705 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3706 {
3707  right.insert(core::stringc(left), 0);
3708  return std::move(right);
3709 }
3710 
3711 
3713 template <typename TAlloc>
3714 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3715 {
3716  left.append(core::stringc(right));
3717  return std::move(left);
3718 }
3719 
3720 
3722 template <typename TAlloc>
3723 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3724 {
3725  right.insert(core::stringc(left), 0);
3726  return std::move(right);
3727 }
3728 
3729 
3731 template <typename TAlloc>
3732 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3733 {
3734  left.append(core::stringc(right));
3735  return std::move(left);
3736 }
3737 
3738 
3740 template <typename TAlloc>
3741 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3742 {
3743  right.insert(core::stringc(left), 0);
3744  return std::move(right);
3745 }
3746 
3747 
3749 template <typename TAlloc>
3750 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3751 {
3752  left.append(core::stringc(right));
3753  return std::move(left);
3754 }
3755 
3756 
3758 template <typename TAlloc>
3759 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3760 {
3761  right.insert(core::stringc(left), 0);
3762  return std::move(right);
3763 }
3764 
3765 
3767 template <typename TAlloc>
3768 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3769 {
3770  left.append(core::stringc(right));
3771  return std::move(left);
3772 }
3773 
3774 
3776 template <typename TAlloc>
3777 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3778 {
3779  right.insert(core::stringc(left), 0);
3780  return std::move(right);
3781 }
3782 
3783 
3785 template <typename TAlloc>
3786 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3787 {
3788  left.append(core::stringc(right));
3789  return std::move(left);
3790 }
3791 
3792 
3794 template <typename TAlloc>
3795 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3796 {
3797  right.insert(core::stringc(left), 0);
3798  return std::move(right);
3799 }
3800 
3801 
3803 template <typename TAlloc>
3804 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3805 {
3806  left.append(core::stringc(right));
3807  return std::move(left);
3808 }
3809 
3810 
3812 template <typename TAlloc>
3813 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3814 {
3815  right.insert(core::stringc(left), 0);
3816  return std::move(right);
3817 }
3818 
3819 
3821 template <typename TAlloc>
3822 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3823 {
3824  left.append(core::stringc(right));
3825  return std::move(left);
3826 }
3827 
3828 
3830 template <typename TAlloc>
3831 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3832 {
3833  right.insert(core::stringc(left), 0);
3834  return std::move(right);
3835 }
3836 #endif
3837 
3838 
3839 #ifndef USTRING_NO_STL
3841 template <typename TAlloc>
3842 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3843 {
3844  out << in.toUTF8_s().c_str();
3845  return out;
3846 }
3847 
3849 template <typename TAlloc>
3850 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3851 {
3852  out << in.toWCHAR_s().c_str();
3853  return out;
3854 }
3855 #endif
3856 
3857 
3858 #ifndef USTRING_NO_STL
3859 
3860 namespace unicode
3861 {
3862 
3865 class hash : public std::unary_function<core::ustring, size_t>
3866 {
3867  public:
3868  size_t operator()(const core::ustring& s) const
3869  {
3870  size_t ret = 2166136261U;
3871  size_t index = 0;
3872  size_t stride = 1 + s.size_raw() / 10;
3873 
3875  while (i != s.end())
3876  {
3877  // TODO: Don't force u32 on an x64 OS. Make it agnostic.
3878  ret = 16777619U * ret ^ (size_t)s[(u32)index];
3879  index += stride;
3880  i += stride;
3881  }
3882  return (ret);
3883  }
3884 };
3885 
3886 } // end namespace unicode
3887 
3888 #endif
3889 
3890 } // end namespace core
3891 } // end namespace irr
Hashing algorithm for hashing a ustring.
Definition: irrUString.h:3866
size_t operator()(const core::ustring &s) const
Definition: irrUString.h:3868
Iterator to iterate through a UTF-16 string.
Definition: irrUString.h:462
_ustring16_const_iterator(const ustring16< TAlloc > &s, const u32 p)
Definition: irrUString.h:486
bool atStart() const
Is the iterator at the start of the string?
Definition: irrUString.h:703
const access const_reference
Definition: irrUString.h:467
_ustring16_const_iterator _Iter
Definition: irrUString.h:464
pointer operator->()
Accesses the full character at the iterator's position.
Definition: irrUString.h:697
const ustring16< TAlloc > * ref
Definition: irrUString.h:737
void toEnd()
Moves the iterator to the end of the string.
Definition: irrUString.h:724
_Iter operator-(const difference_type v) const
Return a new iterator that is a variable number of full characters backward from the current position...
Definition: irrUString.h:621
difference_type operator-(const _Iter &iter) const
Returns the distance between two iterators.
Definition: irrUString.h:629
bool operator==(const _Iter &iter) const
Test for equalness.
Definition: irrUString.h:505
_Iter operator--(int)
Switch to the previous full character in the string, returning the previous position.
Definition: irrUString.h:552
_Base::pointer pointer
Definition: irrUString.h:473
_Iter & operator+=(const difference_type v)
Advance a specified number of full characters in the string.
Definition: irrUString.h:561
bool atEnd() const
Is the iterator at the end of the string?
Definition: irrUString.h:709
_Iter & operator--()
Switch to the previous full character in the string.
Definition: irrUString.h:541
void toStart()
Moves the iterator to the start of the string.
Definition: irrUString.h:718
_Iter & operator++()
Switch to the next full character in the string.
Definition: irrUString.h:521
_Iter operator+(const difference_type v) const
Return a new iterator that is a variable number of full characters forward from the current position.
Definition: irrUString.h:613
std::iterator< std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access > _Base
Definition: irrUString.h:465
const_reference operator*() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:659
_Base::difference_type difference_type
Definition: irrUString.h:471
_Base::difference_type distance_type
Definition: irrUString.h:472
u32 pos
Definition: irrUString.h:738
const_reference reference
Definition: irrUString.h:474
_ustring16_const_iterator(const _Iter &i)
Constructors.
Definition: irrUString.h:484
_Iter operator++(int)
Switch to the next full character in the string, returning the previous position.
Definition: irrUString.h:533
bool operator!=(const _Iter &iter) const
Test for unequalness.
Definition: irrUString.h:513
const_pointer operator->() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:691
reference operator*()
Accesses the full character at the iterator's position.
Definition: irrUString.h:675
_ustring16_const_iterator(const ustring16< TAlloc > &s)
Definition: irrUString.h:485
u32 getPos() const
Returns the iterator's position.
Definition: irrUString.h:731
_Base::value_type value_type
Definition: irrUString.h:470
_Iter & operator-=(const difference_type v)
Go back a specified number of full characters in the string.
Definition: irrUString.h:589
const access const_pointer
Definition: irrUString.h:466
---------------—/// iterator classes /// ---------------—///
Definition: irrUString.h:258
_ustring16_iterator_access & operator-=(int val)
Subtracts from the value by a specified amount.
Definition: irrUString.h:323
_ustring16_iterator_access & operator/=(int val)
Divides the value by a specified amount.
Definition: irrUString.h:341
void _set(uchar32_t c)
Sets a uchar32_t at our current position.
Definition: irrUString.h:413
uchar32_t operator+(int val) const
Adds to the value by a specified amount.
Definition: irrUString.h:359
_ustring16_iterator_access & operator%=(int val)
Modulos the value by a specified amount.
Definition: irrUString.h:350
uchar32_t operator++(int)
Increments the value by 1, returning the old value.
Definition: irrUString.h:287
_ustring16_iterator_access(const ustring16< TAlloc > *s, u32 p)
Definition: irrUString.h:260
uchar32_t operator--(int)
Decrements the value by 1, returning the old value.
Definition: irrUString.h:304
uchar32_t _get() const
Gets a uchar32_t from our current position.
Definition: irrUString.h:398
uchar32_t operator*(int val) const
Multiplies the value by a specified amount.
Definition: irrUString.h:375
_ustring16_iterator_access & operator--()
Decrements the value by 1.
Definition: irrUString.h:296
_ustring16_iterator_access & operator*=(int val)
Multiples the value by a specified amount.
Definition: irrUString.h:332
_ustring16_iterator_access & operator+=(int val)
Adds to the value by a specified amount.
Definition: irrUString.h:314
const ustring16< TAlloc > * ref
Definition: irrUString.h:444
uchar32_t operator/(int val) const
Divides the value by a specified amount.
Definition: irrUString.h:383
uchar32_t operator-(int val) const
Subtracts from the value by a specified amount.
Definition: irrUString.h:367
uchar32_t operator%(int val) const
Modulos the value by a specified amount.
Definition: irrUString.h:391
_ustring16_iterator_access & operator=(const uchar32_t c)
Allow one to change the character in the unicode string.
Definition: irrUString.h:271
_ustring16_iterator_access & operator++()
Increments the value by 1.
Definition: irrUString.h:279
Iterator to iterate through a UTF-16 string.
Definition: irrUString.h:743
_ustring16_iterator(const ustring16< TAlloc > &s, const u32 p)
Definition: irrUString.h:763
_ustring16_iterator(const _Iter &i)
Constructors.
Definition: irrUString.h:761
access reference
Definition: irrUString.h:755
_Base::difference_type difference_type
Definition: irrUString.h:752
_Base::distance_type distance_type
Definition: irrUString.h:753
_Base::const_pointer const_pointer
Definition: irrUString.h:747
reference operator*()
Accesses the full character at the iterator's position.
Definition: irrUString.h:782
access pointer
Definition: irrUString.h:754
pointer operator->()
Accesses the full character at the iterator's position.
Definition: irrUString.h:804
_ustring16_iterator _Iter
Definition: irrUString.h:745
_ustring16_const_iterator _Base
Definition: irrUString.h:746
_Base::value_type value_type
Definition: irrUString.h:751
_ustring16_iterator(const ustring16< TAlloc > &s)
Definition: irrUString.h:762
_Base::const_reference const_reference
Definition: irrUString.h:748
reference operator*() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:766
pointer operator->() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:798
UTF-16 string class.
Definition: irrUString.h:249
ustring16< TAlloc > & erase_raw(u32 pos)
Removes a character from string.
Definition: irrUString.h:2708
ustring16< TAlloc > & removeChars(const ustring16< TAlloc > &characters)
Removes characters from the ustring16.
Definition: irrUString.h:2370
const unicode::EUTF_ENCODE getEncoding() const
Gets the encoding of the Unicode string this class contains.
Definition: irrUString.h:3232
ustring16< TAlloc > & operator=(const B *const c)
Assignment operator for other strings.
Definition: irrUString.h:1210
ustring16< TAlloc > & insert_raw(uchar16_t c, u32 pos)
Inserts a character into the string.
Definition: irrUString.h:2689
ustring16< TAlloc >::_ustring16_iterator_access access
Definition: irrUString.h:447
ustring16< TAlloc > & loadDataStream(const char *data, size_t data_size)
Loads an unknown stream of data.
Definition: irrUString.h:3199
ustring16(const uchar8_t *const c, u32 length)
Constructor for copying a UTF-8 string from a pointer with a given length.
Definition: irrUString.h:952
ustring16(const uchar16_t *const c)
Constructor for copying a UTF-16 string from a pointer.
Definition: irrUString.h:966
bool equalsn(const ustring16< TAlloc > &other, u32 n) const
Compares the first n characters of this string with another.
Definition: irrUString.h:1330
ustring16< TAlloc > & remove(const ustring16< TAlloc > &toRemove)
Removes a ustring16 from the ustring16.
Definition: irrUString.h:2335
ustring16(const uchar32_t *const c, u32 length)
Constructor for copying a UTF-32 from a pointer with a given length.
Definition: irrUString.h:1008
ustring16< TAlloc > & operator=(const uchar16_t *const c)
Assignment operator for UTF-16 strings.
Definition: irrUString.h:1155
uchar16_t * array
Definition: irrUString.h:3273
ustring16< TAlloc > & replace(uchar32_t toReplace, uchar32_t replaceWith)
Replaces all characters of a given type with another one.
Definition: irrUString.h:2170
void reallocate(u32 new_size)
Reallocate the string, making it bigger or smaller.
Definition: irrUString.h:3251
u32 size() const
Returns the length of a ustring16 in full characters.
Definition: irrUString.h:1297
bool operator==(const uchar16_t *const str) const
Equality operator.
Definition: irrUString.h:1242
ustring16< TAlloc >::_ustring16_const_iterator const_iterator
Definition: irrUString.h:811
ustring16< TAlloc > & remove(uchar32_t c)
Removes characters from a ustring16.
Definition: irrUString.h:2299
ustring16< TAlloc > & append(const uchar32_t *const other, u32 length=0xffffffff)
Appends a UTF-32 string to this ustring16.
Definition: irrUString.h:1638
ustring16< TAlloc > & insert(const ustring16< TAlloc > &c, u32 pos)
Inserts a string into the string.
Definition: irrUString.h:2659
bool operator<(const ustring16< TAlloc > &other) const
Is smaller comparator.
Definition: irrUString.h:1268
u32 capacity() const
Gets the size of the allocated memory buffer for the string.
Definition: irrUString.h:2606
ustring16(const string< B, A > &other)
Constructor from other string types.
Definition: irrUString.h:846
ustring16(Itr first, Itr last)
Constructor from iterator.
Definition: irrUString.h:875
s32 findFirstChar(const uchar32_t *const c, u32 count=1) const
Finds first occurrence of a character of a list.
Definition: irrUString.h:1800
ustring16< TAlloc > & validate()
Validate the existing ustring16, checking for valid surrogate pairs and checking for proper terminati...
Definition: irrUString.h:2460
ustring16< TAlloc > subString(u32 begin, s32 length) const
Returns a substring.
Definition: irrUString.h:2028
s32 find(const ustring16< TAlloc > &str, const u32 start=0) const
Finds another ustring16 in this ustring16.
Definition: irrUString.h:1957
core::array< uchar8_t > toUTF8(const bool addBOM=false) const
Converts the string to a UTF-8 encoded string array.
Definition: irrUString.h:2848
u32 used
Definition: irrUString.h:3276
s32 findLastChar(const uchar32_t *const c, u32 count=1) const
Finds last occurrence of a character in a list.
Definition: irrUString.h:1930
const_iterator begin() const
Returns an iterator to the beginning of the string.
Definition: irrUString.h:2742
u32 split(container &ret, const ustring16< TAlloc > &c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
Split the ustring16 into parts.
Definition: irrUString.h:2597
ustring16< TAlloc > & append(const ustring16< TAlloc > &other, u32 length)
Appends a certain amount of characters of a ustring16 to this ustring16.
Definition: irrUString.h:1737
bool equalsn(const uchar16_t *const str, u32 n) const
Compares the first n characters of this string with another.
Definition: irrUString.h:1348
core::array< uchar32_t > toUTF32(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Converts the string to a UTF-32 encoded string array.
Definition: irrUString.h:3045
ustring16< TAlloc > & operator=(const uchar8_t *const c)
Assignment operator for UTF-8 strings.
Definition: irrUString.h:1137
s32 findLast(uchar32_t c, s32 start=-1) const
Finds last occurrence of character.
Definition: irrUString.h:1907
u32 split(container &ret, const uchar32_t *const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
Split the ustring16 into parts.
Definition: irrUString.h:2540
ustring16(const ustring16< TAlloc > &other)
Constructor.
Definition: irrUString.h:832
ustring16(const uchar8_t *const c)
Constructor for copying a UTF-8 string from a pointer.
Definition: irrUString.h:924
bool empty() const
Informs if the ustring is empty or not.
Definition: irrUString.h:1312
void reserve(u32 count)
Reserves some memory.
Definition: irrUString.h:1767
const_iterator cbegin() const
Returns an iterator to the beginning of the string.
Definition: irrUString.h:2751
ustring16< TAlloc > & erase(u32 index)
Erases a character from the ustring16.
Definition: irrUString.h:2439
ustring16(const char *const c, u32 length)
Constructor for copying a character string from a pointer with a given length.
Definition: irrUString.h:909
ustring16< TAlloc > & replace(const ustring16< TAlloc > &toReplace, const ustring16< TAlloc > &replaceWith)
Replaces all instances of a string with another one.
Definition: irrUString.h:2188
ustring16< TAlloc > & operator+=(char c)
Appends a character to this ustring16.
Definition: irrUString.h:2057
~ustring16()
Destructor.
Definition: irrUString.h:1073
core::array< wchar_t > toWCHAR(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Converts the string to a wchar_t encoded string array.
Definition: irrUString.h:3140
ustring16()
-------------------—/// end iterator classes /// -------------------—///
Definition: irrUString.h:818
iterator end()
Returns an iterator to the end of the string.
Definition: irrUString.h:2760
ustring16< TAlloc > & append(const uchar8_t *const other, u32 length=0xffffffff)
Appends a UTF-8 string to this ustring16.
Definition: irrUString.h:1397
s32 findNext(uchar32_t c, u32 startPos) const
Finds next occurrence of character.
Definition: irrUString.h:1885
ustring16< TAlloc > & trim(const ustring16< TAlloc > &whitespace=" \t\n\r")
Trims the ustring16.
Definition: irrUString.h:2420
core::string< wchar_t > toWCHAR_s(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Converts the string to a wchar_t encoded string.
Definition: irrUString.h:3100
u32 size_raw() const
Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
Definition: irrUString.h:2614
ustring16 & operator=(const ustring16< TAlloc > &other)
Assignment operator.
Definition: irrUString.h:1080
ustring16(const wchar_t *const c, u32 length)
Constructor for copying a wchar_t string from a pointer with a given length.
Definition: irrUString.h:1041
s32 find_raw(const ustring16< TAlloc > &str, const u32 start=0) const
Finds another ustring16 in this ustring16.
Definition: irrUString.h:1995
ustring16< TAlloc > & append(const ustring16< TAlloc > &other)
Appends a ustring16 to this ustring16.
Definition: irrUString.h:1714
ustring16< TAlloc > & insert(uchar32_t c, u32 pos)
Inserts a character into the string.
Definition: irrUString.h:2624
ustring16< TAlloc >::_ustring16_iterator iterator
Definition: irrUString.h:810
ustring16< TAlloc > & append(uchar32_t character)
Appends a character to this ustring16.
Definition: irrUString.h:1366
ustring16< TAlloc > & operator=(const wchar_t *const c)
Assignment operator for wchar_t strings.
Definition: irrUString.h:1194
bool operator!=(const uchar16_t *const str) const
Inequality operator.
Definition: irrUString.h:1282
const uchar16_t * c_str() const
Returns a pointer to the raw UTF-16 string data.
Definition: irrUString.h:1320
ustring16(const char *const c)
Constructor for copying a character string from a pointer.
Definition: irrUString.h:894
s32 findFirstCharNotInList(const uchar32_t *const c, u32 count=1) const
Finds first position of a character not in a given list.
Definition: irrUString.h:1826
u32 allocated
Definition: irrUString.h:3275
ustring16(const std::basic_string< B, A, Alloc > &other)
Constructor from std::string.
Definition: irrUString.h:861
ustring16(const uchar32_t *const c)
Constructor for copying a UTF-32 string from a pointer.
Definition: irrUString.h:994
ustring16< TAlloc > & operator=(const string< B, A > &other)
Assignment operator for other string types.
Definition: irrUString.h:1129
s32 findLastCharNotInList(const uchar32_t *const c, u32 count=1) const
Finds last position of a character not in a given list.
Definition: irrUString.h:1855
const_iterator end() const
Returns an iterator to the end of the string.
Definition: irrUString.h:2770
ustring16(const uchar16_t *const c, u32 length)
Constructor for copying a UTF-16 string from a pointer with a given length.
Definition: irrUString.h:980
io::path toPATH_s(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Converts the string to a properly encoded io::path string.
Definition: irrUString.h:3185
s32 findFirst(uchar32_t c) const
Finds first occurrence of character.
Definition: irrUString.h:1779
uchar32_t lastChar() const
Gets the last char of the ustring16, or 0.
Definition: irrUString.h:2496
ustring16(const char c)
Constructor for copying a UTF-8 string from a single char.
Definition: irrUString.h:938
ustring16< TAlloc > & replace_raw(uchar16_t c, u32 pos)
Replaces a character in the string.
Definition: irrUString.h:2724
access operator[](const u32 index)
Direct access operator.
Definition: irrUString.h:1224
const_iterator cend() const
Returns an iterator to the end of the string.
Definition: irrUString.h:2780
ustring16(const wchar_t *const c)
Constructor for copying a wchar_t string from a pointer.
Definition: irrUString.h:1022
core::string< uchar8_t > toUTF8_s(const bool addBOM=false) const
Converts the string to a UTF-8 encoded string.
Definition: irrUString.h:2791
ustring16< TAlloc > & append(const uchar16_t *const other, u32 length=0xffffffff)
Appends a UTF-16 string to this ustring16.
Definition: irrUString.h:1573
core::array< uchar16_t > toUTF16(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Converts the string to a UTF-16 encoded string array.
Definition: irrUString.h:2948
TAlloc allocator
Definition: irrUString.h:3277
unicode::EUTF_ENCODE encoding
Definition: irrUString.h:3274
ustring16< TAlloc > & operator=(const uchar32_t *const c)
Assignment operator for UTF-32 strings.
Definition: irrUString.h:1173
const unicode::EUTF_ENDIAN getEndianness() const
Gets the endianness of the Unicode string this class contains.
Definition: irrUString.h:3239
iterator begin()
Returns an iterator to the beginning of the string.
Definition: irrUString.h:2733
static const irr::u16 UTF16_LO_SURROGATE
Definition: irrUString.h:76
static const irr::u16 UTF16_HI_SURROGATE
UTF-16 surrogate start values.
Definition: irrUString.h:75
#define UTF16_IS_SURROGATE(c)
Is a UTF-16 code point a surrogate?
Definition: irrUString.h:79
#define COPY_ARRAY(source, size)
#define UTF16_IS_SURROGATE_LO(c)
Definition: irrUString.h:81
#define UTF16_IS_SURROGATE_HI(c)
Definition: irrUString.h:80
const irr::u16 UTF_REPLACEMENT_CHARACTER
The unicode replacement character. Used to replace invalid characters.
Definition: irrUString.h:105
EUTF_ENCODE determineUnicodeBOM(const char *data)
Detects if the given data stream starts with a unicode BOM.
Definition: irrUString.h:233
core::array< u8 > getUnicodeBOM(EUTF_ENCODE mode)
Returns the specified unicode byte order mark in a byte array.
Definition: irrUString.h:182
const u8 BOM_ENCODE_UTF32_LEN
Definition: irrUString.h:154
const u8 BOM_ENCODE_UTF16_LE[2]
Definition: irrUString.h:147
const u8 BOM_ENCODE_UTF16_LEN
Definition: irrUString.h:153
const u8 BOM_ENCODE_UTF8[3]
Unicode byte order marks for file operations.
Definition: irrUString.h:145
const u8 BOM_UTF8_LEN
The size of the Unicode byte order mark in terms of the Unicode character size.
Definition: irrUString.h:140
uchar32_t toUTF32(uchar16_t high, uchar16_t low)
Convert a UTF-16 surrogate pair into a UTF-32 character.
Definition: irrUString.h:111
const u16 BOM
The Unicode byte order mark.
Definition: irrUString.h:137
const u8 BOM_UTF16_LEN
Definition: irrUString.h:141
const u8 BOM_ENCODE_UTF16_BE[2]
Definition: irrUString.h:146
const u8 BOM_ENCODE_UTF32_LE[4]
Definition: irrUString.h:149
const u8 BOM_ENCODE_UTF8_LEN
The size in bytes of the Unicode byte marks for file operations.
Definition: irrUString.h:152
EUTF_ENDIAN
Unicode endianness.
Definition: irrUString.h:171
@ EUTFEE_NATIVE
Definition: irrUString.h:172
@ EUTFEE_LITTLE
Definition: irrUString.h:173
@ EUTFEE_BIG
Definition: irrUString.h:174
EUTF_ENCODE
Unicode encoding type.
Definition: irrUString.h:158
@ EUTFE_UTF16_BE
Definition: irrUString.h:163
@ EUTFE_UTF16_LE
Definition: irrUString.h:162
@ EUTFE_UTF32
Definition: irrUString.h:164
@ EUTFE_UTF32_LE
Definition: irrUString.h:165
@ EUTFE_NONE
Definition: irrUString.h:159
@ EUTFE_UTF32_BE
Definition: irrUString.h:166
@ EUTFE_UTF16
Definition: irrUString.h:161
@ EUTFE_UTF8
Definition: irrUString.h:160
uchar16_t swapEndian16(const uchar16_t &c)
Swaps the endianness of a 16-bit value.
Definition: irrUString.h:121
const u8 BOM_ENCODE_UTF32_BE[4]
Definition: irrUString.h:148
const u8 BOM_UTF32_LEN
Definition: irrUString.h:142
uchar32_t swapEndian32(const uchar32_t &c)
Swaps the endianness of a 32-bit value.
Definition: irrUString.h:128
ustring16< TAlloc > operator+(const ustring16< TAlloc > &left, const ustring16< TAlloc > &right)
Appends two ustring16s.
Definition: irrUString.h:3286
std::ostream & operator<<(std::ostream &out, const ustring16< TAlloc > &in)
Writes a ustring16 to an ostream.
Definition: irrUString.h:3842
ustring16< irrAllocator< uchar16_t > > ustring
Definition: irrUString.h:3281
Definition: clouds.h:32
u16 uchar16_t
Definition: irrUString.h:94
u32 uchar32_t
Definition: irrUString.h:93
u8 uchar8_t
Definition: irrUString.h:95
std::string p(std::string path)
Definition: test_filepath.cpp:59