Hoyt's FORK of DemoIccMAX 2.1.17.hoyt
Documentation for Hoyt's FORK of DemoIccMAX
Loading...
Searching...
No Matches
IccConvertUTF.h
Go to the documentation of this file.
1/*
2* Copyright 2001-2004 Unicode, Inc.
3*
4* Disclaimer
5*
6* This source code is provided as is by Unicode, Inc. No claims are
7* made as to fitness for any particular purpose. No warranties of any
8* kind are expressed or implied. The recipient agrees to determine
9* applicability of information provided. If this file has been
10* purchased on magnetic or optical media from Unicode, Inc., the
11* sole remedy for any claim will be exchange of defective media
12* within 90 days of receipt.
13*
14* Limitations on Rights to Redistribute This Code
15*
16* Unicode, Inc. hereby grants the right to freely use the information
17* supplied in this file in the creation of products supporting the
18* Unicode Standard, and to make copies of this file in any form
19* for internal or external distribution as long as this notice
20* remains attached.
21*/
22
23/* ---------------------------------------------------------------------
24
25Conversions between UTF32, UTF-16, and UTF-8. Header file.
26
27Several functions are included here, forming a complete set of
28conversions between the three formats. UTF-7 is not included
29here, but is handled in a separate source file.
30
31Each of these routines takes pointers to input buffers and output
32buffers. The input buffers are const.
33
34Each routine converts the text between *sourceStart and sourceEnd,
35putting the result into the buffer between *targetStart and
36targetEnd. Note: the end pointers are *after* the last item: e.g.
37*(sourceEnd - 1) is the last item.
38
39The return result indicates whether the conversion was successful,
40and if not, whether the problem was in the source or target buffers.
41(Only the first encountered problem is indicated.)
42
43After the conversion, *sourceStart and *targetStart are both
44updated to point to the end of last text successfully converted in
45the respective buffers.
46
47Input parameters:
48sourceStart - pointer to a pointer to the source buffer.
49The contents of this are modified on return so that
50it points at the next thing to be converted.
51targetStart - similarly, pointer to pointer to the target buffer.
52sourceEnd, targetEnd - respectively pointers to the ends of the
53two buffers, for overflow checking only.
54
55These conversion functions take an icUtfConversionFlags argument. When this
56flag is set to strict, both irregular sequences and isolated surrogates
57will cause an error. When the flag is set to lenient, both irregular
58sequences and isolated surrogates are converted.
59
60Whether the flag is strict or lenient, all illegal sequences will cause
61an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
62or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
63must check for illegal sequences.
64
65When the flag is set to lenient, characters over 0x10FFFF are converted
66to the replacement character; otherwise (when the flag is set to strict)
67they constitute an error.
68
69Output parameters:
70The value "sourceIllegal" is returned from some routines if the input
71sequence is malformed. When "sourceIllegal" is returned, the source
72value will point to the illegal value that caused the problem. E.g.,
73in UTF-8 when a sequence is malformed, it points to the start of the
74malformed sequence.
75
76Author: Mark E. Davis, 1994.
77Rev History: Rick McGowan, fixes & updates May 2001.
78Fixes & updates, Sept 2001.
79
80------------------------------------------------------------------------ */
81
82/* ---------------------------------------------------------------------
83July 2009
84- Modified names to avoid possible conflicts - Max Derhak
85- Added IccProfLibConf.h include to use ICCPROFLIB_API with functions
86- Changed typedef of UTF32 to use ICCUINT32
87------------------------------------------------------------------------ */
88
89/* ---------------------------------------------------------------------
90November 2011
91- Added copies of functions that place results in referenced std::vector
92 thus releasing caller from responsibility of allocating buffer with
93 the correct size - Max Derhak
94------------------------------------------------------------------------ */
95
96#include "IccProfLibConf.h"
97
98/* ---------------------------------------------------------------------
99The following 4 definitions are compiler-specific.
100The C standard does not guarantee that wchar_t has at least
10116 bits, so wchar_t is no less portable than unsigned short!
102All should be unsigned values to avoid sign extension during
103bit mask & shift operations.
104------------------------------------------------------------------------ */
105
106typedef ICCUINT32 UTF32; /* at least 32 bits */
107typedef unsigned short UTF16; /* at least 16 bits */
108typedef unsigned char UTF8; /* typically 8 bits */
109typedef unsigned char Boolean; /* 0 or 1 */
110
111/* Some fundamental constants */
112#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
113#define UNI_MAX_BMP (UTF32)0x0000FFFF
114#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
115#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
116#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
117
118typedef enum {
119 conversionOK, /* conversion successful */
120 sourceExhausted, /* partial character in source, but hit end */
121 targetExhausted, /* insuff. room in target for conversion */
122 sourceIllegal /* source sequence is illegal/malformed */
124
129
130/* This is for C++ and does no harm in C */
131#ifdef __cplusplus
132extern "C" {
133#endif
134
136 const UTF8** sourceStart, const UTF8* sourceEnd,
137 UTF16** targetStart, UTF16* targetEnd, icUtfConversionFlags flags);
138
140 const UTF16** sourceStart, const UTF16* sourceEnd,
141 UTF8** targetStart, UTF8* targetEnd, icUtfConversionFlags flags);
142
144 const UTF8** sourceStart, const UTF8* sourceEnd,
145 UTF32** targetStart, UTF32* targetEnd, icUtfConversionFlags flags);
146
148 const UTF32** sourceStart, const UTF32* sourceEnd,
149 UTF8** targetStart, UTF8* targetEnd, icUtfConversionFlags flags);
150
152 const UTF16** sourceStart, const UTF16* sourceEnd,
153 UTF32** targetStart, UTF32* targetEnd, icUtfConversionFlags flags);
154
156 const UTF32** sourceStart, const UTF32* sourceEnd,
157 UTF16** targetStart, UTF16* targetEnd, icUtfConversionFlags flags);
158
159Boolean ICCPROFLIB_API icIsLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
160
161#ifdef __cplusplus
162}
163#include <vector>
164typedef std::vector<UTF8> icUtf8Vector;
165typedef std::vector<UTF16> icUtf16Vector;
166typedef std::vector<UTF32> icUtf32Vector;
167
169 const UTF8* sourceStart, const UTF8* sourceEnd,
170 icUtf16Vector &target, icUtfConversionFlags flags);
171
173 const UTF16* sourceStart, const UTF16* sourceEnd,
174 icUtf8Vector &target, icUtfConversionFlags flags);
175
177 const UTF8* sourceStart, const UTF8* sourceEnd,
178 icUtf32Vector &target, icUtfConversionFlags flags);
179
181 const UTF32* sourceStart, const UTF32* sourceEnd,
182 icUtf8Vector &target, icUtfConversionFlags flags);
183
185 const UTF16* sourceStart, const UTF16* sourceEnd,
186 icUtf32Vector &target, icUtfConversionFlags flags);
187
189 const UTF32* sourceStart, const UTF32* sourceEnd,
190 icUtf16Vector &target, icUtfConversionFlags flags);
191
192#endif
193
194/* --------------------------------------------------------------------- */
Boolean icIsLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
uint32_t UTF32
icUtfConversionResult icConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, icUtfConversionFlags flags)
unsigned short UTF16
icUtfConversionResult icConvertUTF16toUTF32(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, icUtfConversionFlags flags)
icUtfConversionResult icConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, icUtfConversionFlags flags)
icUtfConversionResult
@ targetExhausted
@ sourceIllegal
@ conversionOK
@ sourceExhausted
icUtfConversionResult icConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, icUtfConversionFlags flags)
icUtfConversionResult icConvertUTF32toUTF16(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, icUtfConversionFlags flags)
icUtfConversionResult icConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, icUtfConversionFlags flags)
unsigned char UTF8
unsigned char Boolean
icUtfConversionFlags
@ strictConversion
@ lenientConversion
File: IccProfLibConf.h.
#define ICCPROFLIB_API
#define ICCUINT32