csdetect.h 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 2005-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. */
  9. #ifndef __CSDETECT_H
  10. #define __CSDETECT_H
  11. #include "unicode/uobject.h"
  12. #if !UCONFIG_NO_CONVERSION
  13. #include "unicode/uenum.h"
  14. U_NAMESPACE_BEGIN
  15. class InputText;
  16. class CharsetRecognizer;
  17. class CharsetMatch;
  18. class CharsetDetector : public UMemory
  19. {
  20. private:
  21. InputText *textIn;
  22. CharsetMatch **resultArray;
  23. int32_t resultCount;
  24. UBool fStripTags; // If true, setText() will strip tags from input text.
  25. UBool fFreshTextSet;
  26. static void setRecognizers(UErrorCode &status);
  27. UBool *fEnabledRecognizers; // If not null, active set of charset recognizers had
  28. // been changed from the default. The array index is
  29. // corresponding to fCSRecognizers. See setDetectableCharset().
  30. public:
  31. CharsetDetector(UErrorCode &status);
  32. ~CharsetDetector();
  33. void setText(const char *in, int32_t len);
  34. const CharsetMatch * const *detectAll(int32_t &maxMatchesFound, UErrorCode &status);
  35. const CharsetMatch *detect(UErrorCode& status);
  36. void setDeclaredEncoding(const char *encoding, int32_t len) const;
  37. UBool setStripTagsFlag(UBool flag);
  38. UBool getStripTagsFlag() const;
  39. // const char *getCharsetName(int32_t index, UErrorCode& status) const;
  40. static int32_t getDetectableCount();
  41. static UEnumeration * getAllDetectableCharsets(UErrorCode &status);
  42. UEnumeration * getDetectableCharsets(UErrorCode &status) const;
  43. void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status);
  44. };
  45. U_NAMESPACE_END
  46. #endif
  47. #endif /* __CSDETECT_H */