measunit_impl.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. // © 2020 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #ifndef __MEASUNIT_IMPL_H__
  4. #define __MEASUNIT_IMPL_H__
  5. #include "unicode/utypes.h"
  6. #if !UCONFIG_NO_FORMATTING
  7. #include "unicode/measunit.h"
  8. #include "cmemory.h"
  9. #include "charstr.h"
  10. U_NAMESPACE_BEGIN
  11. namespace number {
  12. namespace impl {
  13. class LongNameHandler;
  14. }
  15. } // namespace number
  16. static const char16_t kDefaultCurrency[] = u"XXX";
  17. static const char kDefaultCurrency8[] = "XXX";
  18. /**
  19. * Looks up the "unitQuantity" (aka "type" or "category") of a base unit
  20. * identifier. The category is returned via `result`, which must initially be
  21. * empty.
  22. *
  23. * This only supports base units: other units must be resolved to base units
  24. * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be
  25. * returned.
  26. *
  27. * Categories are found in `unitQuantities` in the `units` resource (see
  28. * `units.txt`).
  29. */
  30. // TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class.
  31. CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status);
  32. /**
  33. * A struct representing a single unit (optional SI or binary prefix, and dimensionality).
  34. */
  35. struct U_I18N_API SingleUnitImpl : public UMemory {
  36. /**
  37. * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error
  38. * code and returns the base dimensionless unit. Parses if necessary.
  39. */
  40. static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status);
  41. /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
  42. MeasureUnit build(UErrorCode& status) const;
  43. /**
  44. * Returns the "simple unit ID", without SI or dimensionality prefix: this
  45. * instance may represent a square-kilometer, but only "meter" will be
  46. * returned.
  47. *
  48. * The returned pointer points at memory that exists for the duration of the
  49. * program's running.
  50. */
  51. const char *getSimpleUnitID() const;
  52. /**
  53. * Generates and append a neutral identifier string for a single unit which means we do not include
  54. * the dimension signal.
  55. */
  56. void appendNeutralIdentifier(CharString &result, UErrorCode &status) const;
  57. /**
  58. * Returns the index of this unit's "quantity" in unitQuantities (in
  59. * measunit_extra.cpp). The value of this index determines sort order for
  60. * normalization of unit identifiers.
  61. */
  62. int32_t getUnitCategoryIndex() const;
  63. /**
  64. * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
  65. * sorting and coalescing.
  66. *
  67. * Sort order of units is specified by UTS #35
  68. * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization).
  69. *
  70. * Takes the sign of dimensionality into account, but not the absolute
  71. * value: per-meter is not considered the same as meter, but meter is
  72. * considered the same as square-meter.
  73. *
  74. * The dimensionless unit generally does not get compared, but if it did, it
  75. * would sort before other units by virtue of index being < 0 and
  76. * dimensionality not being negative.
  77. */
  78. int32_t compareTo(const SingleUnitImpl& other) const {
  79. if (dimensionality < 0 && other.dimensionality > 0) {
  80. // Positive dimensions first
  81. return 1;
  82. }
  83. if (dimensionality > 0 && other.dimensionality < 0) {
  84. return -1;
  85. }
  86. // Sort by official quantity order
  87. int32_t thisQuantity = this->getUnitCategoryIndex();
  88. int32_t otherQuantity = other.getUnitCategoryIndex();
  89. if (thisQuantity < otherQuantity) {
  90. return -1;
  91. }
  92. if (thisQuantity > otherQuantity) {
  93. return 1;
  94. }
  95. // If quantity order didn't help, then we go by index.
  96. if (index < other.index) {
  97. return -1;
  98. }
  99. if (index > other.index) {
  100. return 1;
  101. }
  102. // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
  103. // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
  104. // compare the bases.
  105. // NOTE: this methodology will fail if the binary prefix more than or equal 98.
  106. int32_t unitBase = umeas_getPrefixBase(unitPrefix);
  107. int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix);
  108. // Values for comparison purposes only.
  109. int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3
  110. : umeas_getPrefixPower(unitPrefix);
  111. int32_t otherUnitPower =
  112. otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3
  113. : umeas_getPrefixPower(other.unitPrefix);
  114. // NOTE: if the unitPower is less than the other,
  115. // we return 1 not -1. Thus because we want th sorting order
  116. // for the bigger prefix to be before the smaller.
  117. // Example: megabyte should come before kilobyte.
  118. if (unitPower < otherUnitPower) {
  119. return 1;
  120. }
  121. if (unitPower > otherUnitPower) {
  122. return -1;
  123. }
  124. if (unitBase < otherUnitBase) {
  125. return 1;
  126. }
  127. if (unitBase > otherUnitBase) {
  128. return -1;
  129. }
  130. return 0;
  131. }
  132. /**
  133. * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing.
  134. *
  135. * Units with the same base unit and SI or binary prefix should match, except that they must also
  136. * have the same dimensionality sign, such that we don't merge numerator and denominator.
  137. */
  138. bool isCompatibleWith(const SingleUnitImpl& other) const {
  139. return (compareTo(other) == 0);
  140. }
  141. /**
  142. * Returns true if this unit is the "dimensionless base unit", as produced
  143. * by the MeasureUnit() default constructor. (This does not include the
  144. * likes of concentrations or angles.)
  145. */
  146. bool isDimensionless() const {
  147. return index == -1;
  148. }
  149. /**
  150. * Simple unit index, unique for every simple unit, -1 for the dimensionless
  151. * unit. This is an index into a string list in measunit_extra.cpp, as
  152. * loaded by SimpleUnitIdentifiersSink.
  153. *
  154. * The default value is -1, meaning the dimensionless unit:
  155. * isDimensionless() will return true, until index is changed.
  156. */
  157. int32_t index = -1;
  158. /**
  159. * SI or binary prefix.
  160. *
  161. * This is ignored for the dimensionless unit.
  162. */
  163. UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE;
  164. /**
  165. * Dimensionality.
  166. *
  167. * This is meaningless for the dimensionless unit.
  168. */
  169. int32_t dimensionality = 1;
  170. };
  171. // Forward declaration
  172. struct MeasureUnitImplWithIndex;
  173. // Export explicit template instantiations of MaybeStackArray, MemoryPool and
  174. // MaybeStackVector. This is required when building DLLs for Windows. (See
  175. // datefmt.h, collationiterator.h, erarules.h and others for similar examples.)
  176. #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
  177. template class U_I18N_API MaybeStackArray<SingleUnitImpl *, 8>;
  178. template class U_I18N_API MemoryPool<SingleUnitImpl, 8>;
  179. template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>;
  180. #endif
  181. /**
  182. * Internal representation of measurement units. Capable of representing all complexities of units,
  183. * including mixed and compound units.
  184. */
  185. class U_I18N_API MeasureUnitImpl : public UMemory {
  186. public:
  187. MeasureUnitImpl() = default;
  188. MeasureUnitImpl(MeasureUnitImpl &&other) = default;
  189. // No copy constructor, use MeasureUnitImpl::copy() to make it explicit.
  190. MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete;
  191. MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status);
  192. MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default;
  193. /** Extract the MeasureUnitImpl from a MeasureUnit. */
  194. static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) {
  195. return measureUnit.fImpl;
  196. }
  197. /**
  198. * Parse a unit identifier into a MeasureUnitImpl.
  199. *
  200. * @param identifier The unit identifier string.
  201. * @param status Set if the identifier string is not valid.
  202. * @return A newly parsed value object. Behaviour of this unit is
  203. * unspecified if an error is returned via status.
  204. */
  205. static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status);
  206. /**
  207. * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
  208. *
  209. * @param measureUnit The source MeasureUnit.
  210. * @param memory A place to write the new MeasureUnitImpl if parsing is required.
  211. * @param status Set if an error occurs.
  212. * @return A reference to either measureUnit.fImpl or memory.
  213. */
  214. static const MeasureUnitImpl& forMeasureUnit(
  215. const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status);
  216. /**
  217. * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
  218. *
  219. * @param measureUnit The source MeasureUnit.
  220. * @param status Set if an error occurs.
  221. * @return A value object, either newly parsed or copied from measureUnit.
  222. */
  223. static MeasureUnitImpl forMeasureUnitMaybeCopy(
  224. const MeasureUnit& measureUnit, UErrorCode& status);
  225. /**
  226. * Used for currency units.
  227. */
  228. static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode) {
  229. MeasureUnitImpl result;
  230. UErrorCode localStatus = U_ZERO_ERROR;
  231. result.identifier.append(currencyCode, localStatus);
  232. // localStatus is not expected to fail since currencyCode should be 3 chars long
  233. return result;
  234. }
  235. /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */
  236. MeasureUnit build(UErrorCode& status) &&;
  237. /**
  238. * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit.
  239. */
  240. MeasureUnitImpl copy(UErrorCode& status) const;
  241. /**
  242. * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices.
  243. * For example:
  244. * - if the `MeasureUnitImpl` is `foot-per-hour`
  245. * it will return a list of 1 {(0, `foot-per-hour`)}
  246. * - if the `MeasureUnitImpl` is `foot-and-inch`
  247. * it will return a list of 2 {(0, `foot`), (1, `inch`)}
  248. */
  249. MaybeStackVector<MeasureUnitImplWithIndex>
  250. extractIndividualUnitsWithIndices(UErrorCode &status) const;
  251. /** Mutates this MeasureUnitImpl to take the reciprocal. */
  252. void takeReciprocal(UErrorCode& status);
  253. /**
  254. * Returns a simplified version of the unit.
  255. * NOTE: the simplification happen when there are two units equals in their base unit and their
  256. * prefixes.
  257. *
  258. * Example 1: "square-meter-per-meter" --> "meter"
  259. * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter"
  260. */
  261. MeasureUnitImpl copyAndSimplify(UErrorCode &status) const;
  262. /**
  263. * Mutates this MeasureUnitImpl to append a single unit.
  264. *
  265. * @return true if a new item was added. If unit is the dimensionless unit,
  266. * it is never added: the return value will always be false.
  267. */
  268. bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status);
  269. /**
  270. * Normalizes a MeasureUnitImpl and generate the identifier string in place.
  271. */
  272. void serialize(UErrorCode &status);
  273. /** The complexity, either SINGLE, COMPOUND, or MIXED. */
  274. UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
  275. /**
  276. * The list of single units. These may be summed or multiplied, based on the
  277. * value of the complexity field.
  278. *
  279. * The "dimensionless" unit (SingleUnitImpl default constructor) must not be
  280. * added to this list.
  281. */
  282. MaybeStackVector<SingleUnitImpl> singleUnits;
  283. /**
  284. * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed.
  285. */
  286. CharString identifier;
  287. // For calling serialize
  288. // TODO(icu-units#147): revisit serialization
  289. friend class number::impl::LongNameHandler;
  290. };
  291. struct U_I18N_API MeasureUnitImplWithIndex : public UMemory {
  292. const int32_t index;
  293. MeasureUnitImpl unitImpl;
  294. // Makes a copy of unitImpl.
  295. MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status)
  296. : index(index), unitImpl(unitImpl.copy(status)) {
  297. }
  298. MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status)
  299. : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) {
  300. }
  301. };
  302. // Export explicit template instantiations of MaybeStackArray, MemoryPool and
  303. // MaybeStackVector. This is required when building DLLs for Windows. (See
  304. // datefmt.h, collationiterator.h, erarules.h and others for similar examples.)
  305. #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
  306. template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>;
  307. template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>;
  308. template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>;
  309. // Export an explicit template instantiation of the LocalPointer that is used as a
  310. // data member of MeasureUnitImpl.
  311. // (When building DLLs for Windows this is required.)
  312. #if defined(_MSC_VER)
  313. // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!=
  314. #pragma warning(push)
  315. #pragma warning(disable : 4661)
  316. #endif
  317. template class U_I18N_API LocalPointerBase<MeasureUnitImpl>;
  318. template class U_I18N_API LocalPointer<MeasureUnitImpl>;
  319. #if defined(_MSC_VER)
  320. #pragma warning(pop)
  321. #endif
  322. #endif
  323. U_NAMESPACE_END
  324. #endif /* #if !UCONFIG_NO_FORMATTING */
  325. #endif //__MEASUNIT_IMPL_H__