123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- * Copyright (C) 2010-2012, International Business Machines
- * Corporation and others. All Rights Reserved.
- *******************************************************************************
- * file name: idna.h
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2010mar05
- * created by: Markus W. Scherer
- */
- #ifndef __IDNA_H__
- #define __IDNA_H__
- /**
- * \file
- * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
- */
- #include "unicode/utypes.h"
- #if U_SHOW_CPLUSPLUS_API
- #if !UCONFIG_NO_IDNA
- #include "unicode/bytestream.h"
- #include "unicode/stringpiece.h"
- #include "unicode/uidna.h"
- #include "unicode/unistr.h"
- U_NAMESPACE_BEGIN
- class IDNAInfo;
- /**
- * Abstract base class for IDNA processing.
- * See http://www.unicode.org/reports/tr46/
- * and http://www.ietf.org/rfc/rfc3490.txt
- *
- * The IDNA class is not intended for public subclassing.
- *
- * This C++ API currently only implements UTS #46.
- * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
- * and IDNA2003 (functions that do not use a service object).
- * @stable ICU 4.6
- */
- class U_COMMON_API IDNA : public UObject {
- public:
- /**
- * Destructor.
- * @stable ICU 4.6
- */
- ~IDNA();
- /**
- * Returns an IDNA instance which implements UTS #46.
- * Returns an unmodifiable instance, owned by the caller.
- * Cache it for multiple operations, and delete it when done.
- * The instance is thread-safe, that is, it can be used concurrently.
- *
- * UTS #46 defines Unicode IDNA Compatibility Processing,
- * updated to the latest version of Unicode and compatible with both
- * IDNA2003 and IDNA2008.
- *
- * The worker functions use transitional processing, including deviation mappings,
- * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
- * is used in which case the deviation characters are passed through without change.
- *
- * Disallowed characters are mapped to U+FFFD.
- *
- * For available options see the uidna.h header.
- * Operations with the UTS #46 instance do not support the
- * UIDNA_ALLOW_UNASSIGNED option.
- *
- * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
- * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
- * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
- *
- * @param options Bit set to modify the processing and error checking.
- * See option bit set values in uidna.h.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return the UTS #46 IDNA instance, if successful
- * @stable ICU 4.6
- */
- static IDNA *
- createUTS46Instance(uint32_t options, UErrorCode &errorCode);
- /**
- * Converts a single domain name label into its ASCII form for DNS lookup.
- * If any processing step fails, then info.hasErrors() will be true and
- * the result might not be an ASCII string.
- * The label might be modified according to the types of errors.
- * Labels with severe errors will be left in (or turned into) their Unicode form.
- *
- * The UErrorCode indicates an error only in exceptional cases,
- * such as a U_MEMORY_ALLOCATION_ERROR.
- *
- * @param label Input domain name label
- * @param dest Destination string object
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual UnicodeString &
- labelToASCII(const UnicodeString &label, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const = 0;
- /**
- * Converts a single domain name label into its Unicode form for human-readable display.
- * If any processing step fails, then info.hasErrors() will be true.
- * The label might be modified according to the types of errors.
- *
- * The UErrorCode indicates an error only in exceptional cases,
- * such as a U_MEMORY_ALLOCATION_ERROR.
- *
- * @param label Input domain name label
- * @param dest Destination string object
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual UnicodeString &
- labelToUnicode(const UnicodeString &label, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const = 0;
- /**
- * Converts a whole domain name into its ASCII form for DNS lookup.
- * If any processing step fails, then info.hasErrors() will be true and
- * the result might not be an ASCII string.
- * The domain name might be modified according to the types of errors.
- * Labels with severe errors will be left in (or turned into) their Unicode form.
- *
- * The UErrorCode indicates an error only in exceptional cases,
- * such as a U_MEMORY_ALLOCATION_ERROR.
- *
- * @param name Input domain name
- * @param dest Destination string object
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual UnicodeString &
- nameToASCII(const UnicodeString &name, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const = 0;
- /**
- * Converts a whole domain name into its Unicode form for human-readable display.
- * If any processing step fails, then info.hasErrors() will be true.
- * The domain name might be modified according to the types of errors.
- *
- * The UErrorCode indicates an error only in exceptional cases,
- * such as a U_MEMORY_ALLOCATION_ERROR.
- *
- * @param name Input domain name
- * @param dest Destination string object
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual UnicodeString &
- nameToUnicode(const UnicodeString &name, UnicodeString &dest,
- IDNAInfo &info, UErrorCode &errorCode) const = 0;
- // UTF-8 versions of the processing methods ---------------------------- ***
- /**
- * Converts a single domain name label into its ASCII form for DNS lookup.
- * UTF-8 version of labelToASCII(), same behavior.
- *
- * @param label Input domain name label
- * @param dest Destination byte sink; Flush()ed if successful
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual void
- labelToASCII_UTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
- /**
- * Converts a single domain name label into its Unicode form for human-readable display.
- * UTF-8 version of labelToUnicode(), same behavior.
- *
- * @param label Input domain name label
- * @param dest Destination byte sink; Flush()ed if successful
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual void
- labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
- /**
- * Converts a whole domain name into its ASCII form for DNS lookup.
- * UTF-8 version of nameToASCII(), same behavior.
- *
- * @param name Input domain name
- * @param dest Destination byte sink; Flush()ed if successful
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual void
- nameToASCII_UTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
- /**
- * Converts a whole domain name into its Unicode form for human-readable display.
- * UTF-8 version of nameToUnicode(), same behavior.
- *
- * @param name Input domain name
- * @param dest Destination byte sink; Flush()ed if successful
- * @param info Output container of IDNA processing details.
- * @param errorCode Standard ICU error code. Its input value must
- * pass the U_SUCCESS() test, or else the function returns
- * immediately. Check for U_FAILURE() on output or use with
- * function chaining. (See User Guide for details.)
- * @return dest
- * @stable ICU 4.6
- */
- virtual void
- nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
- IDNAInfo &info, UErrorCode &errorCode) const;
- };
- class UTS46;
- /**
- * Output container for IDNA processing errors.
- * The IDNAInfo class is not suitable for subclassing.
- * @stable ICU 4.6
- */
- class U_COMMON_API IDNAInfo : public UMemory {
- public:
- /**
- * Constructor for stack allocation.
- * @stable ICU 4.6
- */
- IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
- /**
- * Were there IDNA processing errors?
- * @return true if there were processing errors
- * @stable ICU 4.6
- */
- UBool hasErrors() const { return errors!=0; }
- /**
- * Returns a bit set indicating IDNA processing errors.
- * See UIDNA_ERROR_... constants in uidna.h.
- * @return bit set of processing errors
- * @stable ICU 4.6
- */
- uint32_t getErrors() const { return errors; }
- /**
- * Returns true if transitional and nontransitional processing produce different results.
- * This is the case when the input label or domain name contains
- * one or more deviation characters outside a Punycode label (see UTS #46).
- * <ul>
- * <li>With nontransitional processing, such characters are
- * copied to the destination string.
- * <li>With transitional processing, such characters are
- * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
- * </ul>
- * @return true if transitional and nontransitional processing produce different results
- * @stable ICU 4.6
- */
- UBool isTransitionalDifferent() const { return isTransDiff; }
- private:
- friend class UTS46;
- IDNAInfo(const IDNAInfo &other) = delete; // no copying
- IDNAInfo &operator=(const IDNAInfo &other) = delete; // no copying
- void reset() {
- errors=labelErrors=0;
- isTransDiff=false;
- isBiDi=false;
- isOkBiDi=true;
- }
- uint32_t errors, labelErrors;
- UBool isTransDiff;
- UBool isBiDi;
- UBool isOkBiDi;
- };
- U_NAMESPACE_END
- #endif // UCONFIG_NO_IDNA
- #endif /* U_SHOW_CPLUSPLUS_API */
- #endif // __IDNA_H__
|