utf.h 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. /*
  2. * The authors of this software are Rob Pike and Ken Thompson.
  3. * Copyright (c) 2002 by Lucent Technologies.
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose without fee is hereby granted, provided that this entire notice
  6. * is included in all copies of any software which is or includes a copy
  7. * or modification of this software and in all copies of the supporting
  8. * documentation for such software.
  9. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
  10. * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
  11. * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  12. * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  13. *
  14. * This file and rune.cc have been converted to compile as C++ code
  15. * in name space re2.
  16. */
  17. #ifndef UTIL_UTF_H_
  18. #define UTIL_UTF_H_
  19. #include <stdint.h>
  20. namespace re2 {
  21. typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/
  22. enum
  23. {
  24. UTFmax = 4, /* maximum bytes per rune */
  25. Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
  26. Runeself = 0x80, /* rune and UTF sequences are the same (<) */
  27. Runeerror = 0xFFFD, /* decoding error in UTF */
  28. Runemax = 0x10FFFF, /* maximum rune value */
  29. };
  30. int runetochar(char* s, const Rune* r);
  31. int chartorune(Rune* r, const char* s);
  32. int fullrune(const char* s, int n);
  33. int utflen(const char* s);
  34. char* utfrune(const char*, Rune);
  35. } // namespace re2
  36. #endif // UTIL_UTF_H_