123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899 |
- Network Working Group C. Newman
- Request for Comments: 2192 Innosoft
- Category: Standards Track September 1997
- IMAP URL Scheme
- Status of this memo
- This document specifies an Internet standards track protocol for
- the Internet community, and requests discussion and suggestions for
- improvements. Please refer to the current edition of the "Internet
- Official Protocol Standards" (STD 1) for the standardization state
- and status of this protocol. Distribution of this memo is
- unlimited.
- Abstract
- IMAP [IMAP4] is a rich protocol for accessing remote message
- stores. It provides an ideal mechanism for accessing public
- mailing list archives as well as private and shared message stores.
- This document defines a URL scheme for referencing objects on an
- IMAP server.
- 1. Conventions used in this document
- The key words "MUST", "MUST NOT", "SHOULD", "SHOULD NOT", and "MAY"
- in this document are to be interpreted as defined in "Key words for
- use in RFCs to Indicate Requirement Levels" [KEYWORDS].
- 2. IMAP scheme
- The IMAP URL scheme is used to designate IMAP servers, mailboxes,
- messages, MIME bodies [MIME], and search programs on Internet hosts
- accessible using the IMAP protocol.
- The IMAP URL follows the common Internet scheme syntax as defined
- in RFC 1738 [BASIC-URL] except that clear text passwords are not
- permitted. If :<port> is omitted, the port defaults to 143.
- Newman Standards Track [Page 1]
- RFC 2192 IMAP URL Scheme September 1997
- An IMAP URL takes one of the following forms:
- imap://<iserver>/
- imap://<iserver>/<enc_list_mailbox>;TYPE=<list_type>
- imap://<iserver>/<enc_mailbox>[uidvalidity][?<enc_search>]
- imap://<iserver>/<enc_mailbox>[uidvalidity]<iuid>[isection]
- The first form is used to refer to an IMAP server, the second form
- refers to a list of mailboxes, the third form refers to the
- contents of a mailbox or a set of messages resulting from a search,
- and the final form refers to a specific message or message part.
- Note that the syntax here is informal. The authoritative formal
- syntax for IMAP URLs is defined in section 11.
- 3. IMAP User Name and Authentication Mechanism
- A user name and/or authentication mechanism may be supplied. They
- are used in the "LOGIN" or "AUTHENTICATE" commands after making the
- connection to the IMAP server. If no user name or authentication
- mechanism is supplied, the user name "anonymous" is used with the
- "LOGIN" command and the password is supplied as the Internet e-mail
- address of the end user accessing the resource. If the URL doesn't
- supply a user name, the program interpreting the IMAP URL SHOULD
- request one from the user if necessary.
- An authentication mechanism can be expressed by adding
- ";AUTH=<enc_auth_type>" to the end of the user name. When such an
- <enc_auth_type> is indicated, the client SHOULD request appropriate
- credentials from that mechanism and use the "AUTHENTICATE" command
- instead of the "LOGIN" command. If no user name is specified, one
- SHOULD be obtained from the mechanism or requested from the user as
- appropriate.
- The string ";AUTH=*" indicates that the client SHOULD select an
- appropriate authentication mechanism. It MAY use any mechanism
- listed in the CAPABILITY command or use an out of band security
- service resulting in a PREAUTH connection. If no user name is
- specified and no appropriate authentication mechanisms are
- available, the client SHOULD fall back to anonymous login as
- described above. This allows a URL which grants read-write access
- to authorized users, and read-only anonymous access to other users.
- If a user name is included with no authentication mechanism, then
- ";AUTH=*" is assumed.
- Newman Standards Track [Page 2]
- RFC 2192 IMAP URL Scheme September 1997
- Since URLs can easily come from untrusted sources, care must be
- taken when resolving a URL which requires or requests any sort of
- authentication. If authentication credentials are supplied to the
- wrong server, it may compromise the security of the user's account.
- The program resolving the URL should make sure it meets at least
- one of the following criteria in this case:
- (1) The URL comes from a trusted source, such as a referral server
- which the client has validated and trusts according to site policy.
- Note that user entry of the URL may or may not count as a trusted
- source, depending on the experience level of the user and site
- policy.
- (2) Explicit local site policy permits the client to connect to the
- server in the URL. For example, if the client knows the site
- domain name, site policy may dictate that any hostname ending in
- that domain is trusted.
- (3) The user confirms that connecting to that domain name with the
- specified credentials and/or mechanism is permitted.
- (4) A mechanism is used which validates the server before passing
- potentially compromising client credentials.
- (5) An authentication mechanism is used which will not reveal
- information to the server which could be used to compromise future
- connections.
- URLs which do not include a user name must be treated with extra
- care, since they are more likely to compromise the user's primary
- account. A URL containing ";AUTH=*" must also be treated with
- extra care since it might fall back on a weaker security mechanism.
- Finally, clients are discouraged from using a plain text password
- as a fallback with ";AUTH=*" unless the connection has strong
- encryption (e.g. a key length of greater than 56 bits).
- A program interpreting IMAP URLs MAY cache open connections to an
- IMAP server for later re-use. If a URL contains a user name, only
- connections authenticated as that user may be re-used. If a URL
- does not contain a user name or authentication mechanism, then only
- an anonymous connection may be re-used. If a URL contains an
- authentication mechanism without a user name, then any non-
- anonymous connection may be re-used.
- Note that if unsafe or reserved characters such as " " or ";" are
- present in the user name or authentication mechanism, they MUST be
- encoded as described in RFC 1738 [BASIC-URL].
- Newman Standards Track [Page 3]
- RFC 2192 IMAP URL Scheme September 1997
- 4. IMAP server
- An IMAP URL referring to an IMAP server has the following form:
- imap://<iserver>/
- A program interpreting this URL would issue the standard set of
- commands it uses to present a view of the contents of an IMAP
- server. This is likely to be semanticly equivalent to one of the
- following URLs:
- imap://<iserver>/;TYPE=LIST
- imap://<iserver>/;TYPE=LSUB
- The program interpreting this URL SHOULD use the LSUB form if it
- supports mailbox subscriptions.
- 5. Lists of mailboxes
- An IMAP URL referring to a list of mailboxes has the following
- form:
- imap://<iserver>/<enc_list_mailbox>;TYPE=<list_type>
- The <list_type> may be either "LIST" or "LSUB", and is case
- insensitive. The field ";TYPE=<list_type>" MUST be included.
- The <enc_list_mailbox> is any argument suitable for the
- list_mailbox field of the IMAP [IMAP4] LIST or LSUB commands. The
- field <enc_list_mailbox> may be omitted, in which case the program
- interpreting the IMAP URL may use "*" or "%" as the
- <enc_list_mailbox>. The program SHOULD use "%" if it supports a
- hierarchical view, otherwise it SHOULD use "*".
- Note that if unsafe or reserved characters such as " " or "%" are
- present in <enc_list_mailbox> they MUST be encoded as described in
- RFC 1738 [BASIC-URL]. If the character "/" is present in
- enc_list_mailbox, it SHOULD NOT be encoded.
- 6. Lists of messages
- An IMAP URL referring to a list of messages has the following form:
- imap://<iserver>/<enc_mailbox>[uidvalidity][?<enc_search>]
- Newman Standards Track [Page 4]
- RFC 2192 IMAP URL Scheme September 1997
- The <enc_mailbox> field is used as the argument to the IMAP4
- "SELECT" command. Note that if unsafe or reserved characters such
- as " ", ";", or "?" are present in <enc_mailbox> they MUST be
- encoded as described in RFC 1738 [BASIC-URL]. If the character "/"
- is present in enc_mailbox, it SHOULD NOT be encoded.
- The [uidvalidity] field is optional. If it is present, it MUST be
- the argument to the IMAP4 UIDVALIDITY status response at the time
- the URL was created. This SHOULD be used by the program
- interpreting the IMAP URL to determine if the URL is stale.
- The [?<enc_search>] field is optional. If it is not present, the
- contents of the mailbox SHOULD be presented by the program
- interpreting the URL. If it is present, it SHOULD be used as the
- arguments following an IMAP4 SEARCH command with unsafe characters
- such as " " (which are likely to be present in the <enc_search>)
- encoded as described in RFC 1738 [BASIC-URL].
- 7. A specific message or message part
- An IMAP URL referring to a specific message or message part has the
- following form:
- imap://<iserver>/<enc_mailbox>[uidvalidity]<iuid>[isection]
- The <enc_mailbox> and [uidvalidity] are as defined above.
- If [uidvalidity] is present in this form, it SHOULD be used by the
- program interpreting the URL to determine if the URL is stale.
- The <iuid> refers to an IMAP4 message UID, and SHOULD be used as
- the <set> argument to the IMAP4 "UID FETCH" command.
- The [isection] field is optional. If not present, the URL refers
- to the entire Internet message as returned by the IMAP command "UID
- FETCH <uid> BODY.PEEK[]". If present, the URL refers to the object
- returned by a "UID FETCH <uid> BODY.PEEK[<section>]" command. The
- type of the object may be determined with a "UID FETCH <uid>
- BODYSTRUCTURE" command and locating the appropriate part in the
- resulting BODYSTRUCTURE. Note that unsafe characters in [isection]
- MUST be encoded as described in [BASIC-URL].
- Newman Standards Track [Page 5]
- RFC 2192 IMAP URL Scheme September 1997
- 8. Relative IMAP URLs
- Relative IMAP URLs are permitted and are resolved according to the
- rules defined in RFC 1808 [REL-URL] with one exception. In IMAP
- URLs, parameters are treated as part of the normal path with
- respect to relative URL resolution. This is believed to be the
- behavior of the installed base and is likely to be documented in a
- future revision of the relative URL specification.
- The following observations are also important:
- The <iauth> grammar element is considered part of the user name for
- purposes of resolving relative IMAP URLs. This means that unless a
- new login/server specification is included in the relative URL, the
- authentication mechanism is inherited from a base IMAP URL.
- URLs always use "/" as the hierarchy delimiter for the purpose of
- resolving paths in relative URLs. IMAP4 permits the use of any
- hierarchy delimiter in mailbox names. For this reason, relative
- mailbox paths will only work if the mailbox uses "/" as the
- hierarchy delimiter. Relative URLs may be used on mailboxes which
- use other delimiters, but in that case, the entire mailbox name
- MUST be specified in the relative URL or inherited as a whole from
- the base URL.
- The base URL for a list of mailboxes or messages which was referred
- to by an IMAP URL is always the referring IMAP URL itself. The
- base URL for a message or message part which was referred to by an
- IMAP URL may be more complicated to determine. The program
- interpreting the relative URL will have to check the headers of the
- MIME entity and any enclosing MIME entities in order to locate the
- "Content-Base" and "Content-Location" headers. These headers are
- used to determine the base URL as defined in [HTTP]. For example,
- if the referring IMAP URL contains a "/;SECTION=1.2" parameter,
- then the MIME headers for section 1.2, for section 1, and for the
- enclosing message itself SHOULD be checked in that order for
- "Content-Base" or "Content-Location" headers.
- 9. Multinational Considerations
- IMAP4 [IMAP4] section 5.1.3 includes a convention for encoding
- non-US-ASCII characters in IMAP mailbox names. Because this
- convention is private to IMAP, it is necessary to convert IMAP's
- encoding to one that can be more easily interpreted by a URL
- display program. For this reason, IMAP's modified UTF-7 encoding
- for mailboxes MUST be converted to UTF-8 [UTF8]. Since 8-bit
- characters are not permitted in URLs, the UTF-8 characters are
- Newman Standards Track [Page 6]
- RFC 2192 IMAP URL Scheme September 1997
- encoded as required by the URL specification [BASIC-URL]. Sample
- code is included in Appendix A to demonstrate this conversion.
- 10. Examples
- The following examples demonstrate how an IMAP4 client program
- might translate various IMAP4 URLs into a series of IMAP4 commands.
- Commands sent from the client to the server are prefixed with "C:",
- and responses sent from the server to the client are prefixed with
- "S:".
- The URL:
- <imap://minbari.org/gray-council;UIDVALIDITY=385759045/;UID=20>
- Results in the following client commands:
- <connect to minbari.org, port 143>
- C: A001 LOGIN ANONYMOUS sheridan@babylon5.org
- C: A002 SELECT gray-council
- <client verifies the UIDVALIDITY matches>
- C: A003 UID FETCH 20 BODY.PEEK[]
- The URL:
- <imap://michael@minbari.org/users.*;type=list>
- Results in the following client commands:
- <client requests password from user>
- <connect to minbari.org imap server, activate strong encryption>
- C: A001 LOGIN MICHAEL zipper
- C: A002 LIST "" users.*
- The URL:
- <imap://psicorp.org/~peter/%E6%97%A5%E6%9C%AC%E8%AA%9E/
- %E5%8F%B0%E5%8C%97>
- Results in the following client commands:
- <connect to psicorp.org, port 143>
- C: A001 LOGIN ANONYMOUS bester@psycop.psicorp.org
- C: A002 SELECT ~peter/&ZeVnLIqe-/&U,BTFw-
- <commands the client uses for viewing the contents of a mailbox>
- Newman Standards Track [Page 7]
- RFC 2192 IMAP URL Scheme September 1997
- The URL:
- <imap://;AUTH=KERBEROS_V4@minbari.org/gray-council/;uid=20/
- ;section=1.2>
- Results in the following client commands:
- <connect to minbari.org, port 143>
- C: A001 AUTHENTICATE KERBEROS_V4
- <authentication exchange>
- C: A002 SELECT gray-council
- C: A003 UID FETCH 20 BODY.PEEK[1.2]
- If the following relative URL is located in that body part:
- <;section=1.4>
- This could result in the following client commands:
- C: A004 UID FETCH 20 (BODY.PEEK[1.2.MIME]
- BODY.PEEK[1.MIME]
- BODY.PEEK[HEADER.FIELDS (Content-Base Content-Location)])
- <Client looks for Content-Base or Content-Location headers in
- result. If no such headers, then it does the following>
- C: A005 UID FETCH 20 BODY.PEEK[1.4]
- The URL:
- <imap://;AUTH=*@minbari.org/gray%20council?SUBJECT%20shadows>
- Could result in the following:
- <connect to minbari.org, port 143>
- C: A001 CAPABILITY
- S: * CAPABILITY IMAP4rev1 AUTH=GSSAPI
- S: A001 OK
- C: A002 AUTHENTICATE GSSAPI
- <authentication exchange>
- S: A002 OK user lennier authenticated
- C: A003 SELECT "gray council"
- ...
- C: A004 SEARCH SUBJECT shadows
- S: * SEARCH 8 10 13 14 15 16
- S: A004 OK SEARCH completed
- C: A005 FETCH 8,10,13:16 ALL
- ...
- Newman Standards Track [Page 8]
- RFC 2192 IMAP URL Scheme September 1997
- NOTE: In this final example, the client has implementation
- dependent choices. The authentication mechanism could be anything,
- including PREAUTH. And the final FETCH command could fetch more or
- less information about the messages, depending on what it wishes to
- display to the user.
- 11. Security Considerations
- Security considerations discussed in the IMAP specification [IMAP4]
- and the URL specification [BASIC-URL] are relevant. Security
- considerations related to authenticated URLs are discussed in
- section 3 of this document.
- Many email clients store the plain text password for later use
- after logging into an IMAP server. Such clients MUST NOT use a
- stored password in response to an IMAP URL without explicit
- permission from the user to supply that password to the specified
- host name.
- 12. ABNF for IMAP URL scheme
- This uses ABNF as defined in RFC 822 [IMAIL]. Terminals from the
- BNF for IMAP [IMAP4] and URLs [BASIC-URL] are also used. Strings
- are not case sensitive and free insertion of linear-white-space is
- not permitted.
- achar = uchar / "&" / "=" / "~"
- ; see [BASIC-URL] for "uchar" definition
- bchar = achar / ":" / "@" / "/"
- enc_auth_type = 1*achar
- ; encoded version of [IMAP-AUTH] "auth_type"
- enc_list_mailbox = 1*bchar
- ; encoded version of [IMAP4] "list_mailbox"
- enc_mailbox = 1*bchar
- ; encoded version of [IMAP4] "mailbox"
- enc_search = 1*bchar
- ; encoded version of search_program below
- enc_section = 1*bchar
- ; encoded version of section below
- Newman Standards Track [Page 9]
- RFC 2192 IMAP URL Scheme September 1997
- enc_user = 1*achar
- ; encoded version of [IMAP4] "userid"
- imapurl = "imap://" iserver "/" [ icommand ]
- iauth = ";AUTH=" ( "*" / enc_auth_type )
- icommand = imailboxlist / imessagelist / imessagepart
- imailboxlist = [enc_list_mailbox] ";TYPE=" list_type
- imessagelist = enc_mailbox [ "?" enc_search ] [uidvalidity]
- imessagepart = enc_mailbox [uidvalidity] iuid [isection]
- isection = "/;SECTION=" enc_section
- iserver = [iuserauth "@"] hostport
- ; See [BASIC-URL] for "hostport" definition
- iuid = "/;UID=" nz_number
- ; See [IMAP4] for "nz_number" definition
- iuserauth = enc_user [iauth] / [enc_user] iauth
- list_type = "LIST" / "LSUB"
- search_program = ["CHARSET" SPACE astring SPACE]
- search_key *(SPACE search_key)
- ; IMAP4 literals may not be used
- ; See [IMAP4] for "astring" and "search_key"
- section = section_text / (nz_number *["." nz_number]
- ["." (section_text / "MIME")])
- ; See [IMAP4] for "section_text" and "nz_number"
- uidvalidity = ";UIDVALIDITY=" nz_number
- ; See [IMAP4] for "nz_number" definition
- 13. References
- [BASIC-URL] Berners-Lee, Masinter, McCahill, "Uniform Resource
- Locators (URL)", RFC 1738, CERN, Xerox Corporation, University of
- Minnesota, December 1994.
- <ftp://ds.internic.net/rfc/rfc1738.txt>
- Newman Standards Track [Page 10]
- RFC 2192 IMAP URL Scheme September 1997
- [IMAP4] Crispin, M., "Internet Message Access Protocol - Version
- 4rev1", RFC 2060, University of Washington, December 1996.
- <ftp://ds.internic.net/rfc/rfc2060.txt>
- [IMAP-AUTH] Myers, J., "IMAP4 Authentication Mechanism", RFC 1731,
- Carnegie-Mellon University, December 1994.
- <ftp://ds.internic.net/rfc/rfc1731.txt>
- [HTTP] Fielding, Gettys, Mogul, Frystyk, Berners-Lee, "Hypertext
- Transfer Protocol -- HTTP/1.1", RFC 2068, UC Irvine, DEC, MIT/LCS,
- January 1997.
- <ftp://ds.internic.net/rfc/rfc2068.txt>
- [IMAIL] Crocker, "Standard for the Format of ARPA Internet Text
- Messages", STD 11, RFC 822, University of Delaware, August 1982.
- <ftp://ds.internic.net/rfc/rfc822.txt>
- [KEYWORDS] Bradner, "Key words for use in RFCs to Indicate
- Requirement Levels", RFC 2119, Harvard University, March 1997.
- <ftp://ds.internic.net/rfc/rfc2119.txt>
- [MIME] Freed, N., Borenstein, N., "Multipurpose Internet Mail
- Extensions", RFC 2045, Innosoft, First Virtual, November 1996.
- <ftp://ds.internic.net/rfc/rfc2045.txt>
- [REL-URL] Fielding, "Relative Uniform Resource Locators", RFC 1808,
- UC Irvine, June 1995.
- <ftp://ds.internic.net/rfc/rfc1808.txt>
- [UTF8] Yergeau, F. "UTF-8, a transformation format of Unicode and
- ISO 10646", RFC 2044, Alis Technologies, October 1996.
- <ftp://ds.internic.net/rfc/rfc2044.txt>
- 14. Author's Address
- Chris Newman
- Innosoft International, Inc.
- 1050 Lakes Drive
- West Covina, CA 91790 USA
- EMail: chris.newman@innosoft.com
- Newman Standards Track [Page 11]
- RFC 2192 IMAP URL Scheme September 1997
- Appendix A. Sample code
- Here is sample C source code to convert between URL paths and IMAP
- mailbox names, taking into account mapping between IMAP's modified UTF-7
- [IMAP4] and hex-encoded UTF-8 which is more appropriate for URLs. This
- code has not been rigorously tested nor does it necessarily behave
- reasonably with invalid input, but it should serve as a useful example.
- This code just converts the mailbox portion of the URL and does not deal
- with parameters, query or server components of the URL.
- #include <stdio.h>
- #include <string.h>
- /* hexadecimal lookup table */
- static char hex[] = "0123456789ABCDEF";
- /* URL unsafe printable characters */
- static char urlunsafe[] = " \"#%&+:;<=>?@[\\]^`{|}";
- /* UTF7 modified base64 alphabet */
- static char base64chars[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
- #define UNDEFINED 64
- /* UTF16 definitions */
- #define UTF16MASK 0x03FFUL
- #define UTF16SHIFT 10
- #define UTF16BASE 0x10000UL
- #define UTF16HIGHSTART 0xD800UL
- #define UTF16HIGHEND 0xDBFFUL
- #define UTF16LOSTART 0xDC00UL
- #define UTF16LOEND 0xDFFFUL
- /* Convert an IMAP mailbox to a URL path
- * dst needs to have roughly 4 times the storage space of src
- * Hex encoding can triple the size of the input
- * UTF-7 can be slightly denser than UTF-8
- * (worst case: 8 octets UTF-7 becomes 9 octets UTF-8)
- */
- void MailboxToURL(char *dst, char *src)
- {
- unsigned char c, i, bitcount;
- unsigned long ucs4, utf16, bitbuf;
- unsigned char base64[256], utf8[6];
- Newman Standards Track [Page 12]
- RFC 2192 IMAP URL Scheme September 1997
- /* initialize modified base64 decoding table */
- memset(base64, UNDEFINED, sizeof (base64));
- for (i = 0; i < sizeof (base64chars); ++i) {
- base64[base64chars[i]] = i;
- }
- /* loop until end of string */
- while (*src != '\0') {
- c = *src++;
- /* deal with literal characters and &- */
- if (c != '&' || *src == '-') {
- if (c < ' ' || c > '~' || strchr(urlunsafe, c) != NULL) {
- /* hex encode if necessary */
- dst[0] = '%';
- dst[1] = hex[c >> 4];
- dst[2] = hex[c & 0x0f];
- dst += 3;
- } else {
- /* encode literally */
- *dst++ = c;
- }
- /* skip over the '-' if this is an &- sequence */
- if (c == '&') ++src;
- } else {
- /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
- bitbuf = 0;
- bitcount = 0;
- ucs4 = 0;
- while ((c = base64[(unsigned char) *src]) != UNDEFINED) {
- ++src;
- bitbuf = (bitbuf << 6) | c;
- bitcount += 6;
- /* enough bits for a UTF-16 character? */
- if (bitcount >= 16) {
- bitcount -= 16;
- utf16 = (bitcount ? bitbuf >> bitcount
- : bitbuf) & 0xffff;
- /* convert UTF16 to UCS4 */
- if
- (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND) {
- ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
- continue;
- } else if
- (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND) {
- ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
- } else {
- ucs4 = utf16;
- }
- Newman Standards Track [Page 13]
- RFC 2192 IMAP URL Scheme September 1997
- /* convert UTF-16 range of UCS4 to UTF-8 */
- if (ucs4 <= 0x7fUL) {
- utf8[0] = ucs4;
- i = 1;
- } else if (ucs4 <= 0x7ffUL) {
- utf8[0] = 0xc0 | (ucs4 >> 6);
- utf8[1] = 0x80 | (ucs4 & 0x3f);
- i = 2;
- } else if (ucs4 <= 0xffffUL) {
- utf8[0] = 0xe0 | (ucs4 >> 12);
- utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
- utf8[2] = 0x80 | (ucs4 & 0x3f);
- i = 3;
- } else {
- utf8[0] = 0xf0 | (ucs4 >> 18);
- utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
- utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
- utf8[3] = 0x80 | (ucs4 & 0x3f);
- i = 4;
- }
- /* convert utf8 to hex */
- for (c = 0; c < i; ++c) {
- dst[0] = '%';
- dst[1] = hex[utf8[c] >> 4];
- dst[2] = hex[utf8[c] & 0x0f];
- dst += 3;
- }
- }
- }
- /* skip over trailing '-' in modified UTF-7 encoding */
- if (*src == '-') ++src;
- }
- }
- /* terminate destination string */
- *dst = '\0';
- }
- /* Convert hex coded UTF-8 URL path to modified UTF-7 IMAP mailbox
- * dst should be about twice the length of src to deal with non-hex
- * coded URLs
- */
- void URLtoMailbox(char *dst, char *src)
- {
- unsigned int utf8pos, utf8total, i, c, utf7mode, bitstogo, utf16flag;
- unsigned long ucs4, bitbuf;
- unsigned char hextab[256];
- /* initialize hex lookup table */
- Newman Standards Track [Page 14]
- RFC 2192 IMAP URL Scheme September 1997
- memset(hextab, 0, sizeof (hextab));
- for (i = 0; i < sizeof (hex); ++i) {
- hextab[hex[i]] = i;
- if (isupper(hex[i])) hextab[tolower(hex[i])] = i;
- }
- utf7mode = 0;
- utf8total = 0;
- bitstogo = 0;
- while ((c = *src) != '\0') {
- ++src;
- /* undo hex-encoding */
- if (c == '%' && src[0] != '\0' && src[1] != '\0') {
- c = (hextab[src[0]] << 4) | hextab[src[1]];
- src += 2;
- }
- /* normal character? */
- if (c >= ' ' && c <= '~') {
- /* switch out of UTF-7 mode */
- if (utf7mode) {
- if (bitstogo) {
- *dst++ = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
- }
- *dst++ = '-';
- utf7mode = 0;
- }
- *dst++ = c;
- /* encode '&' as '&-' */
- if (c == '&') {
- *dst++ = '-';
- }
- continue;
- }
- /* switch to UTF-7 mode */
- if (!utf7mode) {
- *dst++ = '&';
- utf7mode = 1;
- }
- /* Encode US-ASCII characters as themselves */
- if (c < 0x80) {
- ucs4 = c;
- utf8total = 1;
- } else if (utf8total) {
- /* save UTF8 bits into UCS4 */
- ucs4 = (ucs4 << 6) | (c & 0x3FUL);
- if (++utf8pos < utf8total) {
- continue;
- }
- Newman Standards Track [Page 15]
- RFC 2192 IMAP URL Scheme September 1997
- } else {
- utf8pos = 1;
- if (c < 0xE0) {
- utf8total = 2;
- ucs4 = c & 0x1F;
- } else if (c < 0xF0) {
- utf8total = 3;
- ucs4 = c & 0x0F;
- } else {
- /* NOTE: can't convert UTF8 sequences longer than 4 */
- utf8total = 4;
- ucs4 = c & 0x03;
- }
- continue;
- }
- /* loop to split ucs4 into two utf16 chars if necessary */
- utf8total = 0;
- do {
- if (ucs4 >= UTF16BASE) {
- ucs4 -= UTF16BASE;
- bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT)
- + UTF16HIGHSTART);
- ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
- utf16flag = 1;
- } else {
- bitbuf = (bitbuf << 16) | ucs4;
- utf16flag = 0;
- }
- bitstogo += 16;
- /* spew out base64 */
- while (bitstogo >= 6) {
- bitstogo -= 6;
- *dst++ = base64chars[(bitstogo ? (bitbuf >> bitstogo)
- : bitbuf)
- & 0x3F];
- }
- } while (utf16flag);
- }
- /* if in UTF-7 mode, finish in ASCII */
- if (utf7mode) {
- if (bitstogo) {
- *dst++ = base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
- }
- *dst++ = '-';
- }
- /* tie off string */
- *dst = '\0';
- }
- Newman Standards Track [Page 16]
|