Line data Source code
1 : /* idna.c - implementation of high-level IDNA processing function
2 : Copyright (C) 2011-2024 Simon Josefsson
3 :
4 : Libidn2 is free software: you can redistribute it and/or modify it
5 : under the terms of either:
6 :
7 : * the GNU Lesser General Public License as published by the Free
8 : Software Foundation; either version 3 of the License, or (at
9 : your option) any later version.
10 :
11 : or
12 :
13 : * the GNU General Public License as published by the Free
14 : Software Foundation; either version 2 of the License, or (at
15 : your option) any later version.
16 :
17 : or both in parallel, as here.
18 :
19 : This program is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 : GNU General Public License for more details.
23 :
24 : You should have received copies of the GNU General Public License and
25 : the GNU Lesser General Public License along with this program. If
26 : not, see <http://www.gnu.org/licenses/>.
27 : */
28 :
29 : #include <config.h>
30 :
31 : #include <stdlib.h> /* free */
32 : #include <errno.h> /* errno */
33 :
34 : #include "idn2.h"
35 : #include "bidi.h"
36 : #include "tables.h"
37 : #include "context.h"
38 : #include "tr46map.h"
39 :
40 : #include <unitypes.h>
41 : #include <unictype.h> /* uc_is_general_category, UC_CATEGORY_M */
42 : #include <uninorm.h> /* u32_normalize */
43 : #include <unistr.h> /* u8_to_u32 */
44 :
45 : #include "idna.h"
46 :
47 : /*
48 : * NFC Quick Check from
49 : * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
50 : *
51 : * They say, this is much faster than 'brute force' normalization.
52 : * Strings are very likely already in NFC form.
53 : */
54 : G_GNUC_IDN2_ATTRIBUTE_PURE static int
55 145983 : _isNFC (uint32_t *label, size_t len)
56 : {
57 145983 : int lastCanonicalClass = 0;
58 145983 : int result = 1;
59 : size_t it;
60 :
61 299607 : for (it = 0; it < len; it++)
62 : {
63 153828 : uint32_t ch = label[it];
64 :
65 : // supplementary code point
66 153828 : if (ch >= 0x10000)
67 90614 : it++;
68 :
69 153828 : int canonicalClass = uc_combining_class (ch);
70 153828 : if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
71 0 : return 0;
72 :
73 153828 : NFCQCMap *map = get_nfcqc_map (ch);
74 153828 : if (map)
75 : {
76 204 : if (map->check)
77 204 : return 0;
78 0 : result = -1;
79 : }
80 :
81 153624 : lastCanonicalClass = canonicalClass;
82 : }
83 :
84 145779 : return result;
85 : }
86 :
87 : int
88 1703089 : _idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
89 : uint32_t **out, size_t *outlen, int nfc)
90 : {
91 : uint32_t *p;
92 : size_t plen;
93 :
94 1703089 : p = u8_to_u32 (src, srclen, NULL, &plen);
95 1703089 : if (p == NULL)
96 : {
97 1 : if (errno == ENOMEM)
98 0 : return IDN2_MALLOC;
99 1 : return IDN2_ENCODING_ERROR;
100 : }
101 :
102 1703088 : if (nfc && !_isNFC (p, plen))
103 : {
104 : size_t tmplen;
105 204 : uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
106 204 : free (p);
107 204 : if (tmp == NULL)
108 : {
109 0 : if (errno == ENOMEM)
110 0 : return IDN2_MALLOC;
111 0 : return IDN2_NFC;
112 : }
113 :
114 204 : p = tmp;
115 204 : plen = tmplen;
116 : }
117 :
118 1703088 : *out = p;
119 1703088 : *outlen = plen;
120 1703088 : return IDN2_OK;
121 : }
122 :
123 : bool
124 1715441 : _idn2_ascii_p (const uint8_t *src, size_t srclen)
125 : {
126 : size_t i;
127 :
128 1788920 : for (i = 0; i < srclen; i++)
129 1775168 : if (src[i] >= 0x80)
130 1701689 : return false;
131 :
132 13752 : return true;
133 : }
134 :
135 : int
136 2183756 : _idn2_label_test (int what, const uint32_t *label, size_t llen)
137 : {
138 2183756 : if (what & TEST_NFC)
139 : {
140 : size_t plen;
141 2183756 : uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
142 : NULL, &plen);
143 : int ok;
144 2183756 : if (p == NULL)
145 : {
146 0 : if (errno == ENOMEM)
147 1264 : return IDN2_MALLOC;
148 0 : return IDN2_NFC;
149 : }
150 2183756 : ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
151 2183756 : free (p);
152 2183756 : if (!ok)
153 1264 : return IDN2_NOT_NFC;
154 : }
155 :
156 2182492 : if (what & TEST_2HYPHEN)
157 : {
158 2182492 : if (llen >= 4 && label[2] == '-' && label[3] == '-')
159 583 : return IDN2_2HYPHEN;
160 : }
161 :
162 2181909 : if (what & TEST_HYPHEN_STARTEND)
163 : {
164 629636 : if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
165 2729 : return IDN2_HYPHEN_STARTEND;
166 : }
167 :
168 2179180 : if (what & TEST_LEADING_COMBINING)
169 : {
170 2179180 : if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
171 13461 : return IDN2_LEADING_COMBINING;
172 : }
173 :
174 2165719 : if (what & TEST_DISALLOWED)
175 : {
176 : size_t i;
177 2964718 : for (i = 0; i < llen; i++)
178 1593941 : if (_idn2_disallowed_p (label[i]))
179 : {
180 184063 : if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
181 32259 : (what & TEST_ALLOW_STD3_DISALLOWED))
182 : {
183 : IDNAMap map;
184 32243 : get_idna_map (label[i], &map);
185 32243 : if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
186 27613 : map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
187 4643 : continue;
188 :
189 : }
190 :
191 179420 : return IDN2_DISALLOWED;
192 : }
193 : }
194 :
195 1986299 : if (what & TEST_CONTEXTJ)
196 : {
197 : size_t i;
198 0 : for (i = 0; i < llen; i++)
199 0 : if (_idn2_contextj_p (label[i]))
200 0 : return IDN2_CONTEXTJ;
201 : }
202 :
203 1986299 : if (what & TEST_CONTEXTJ_RULE)
204 : {
205 : size_t i;
206 : int rc;
207 :
208 2780065 : for (i = 0; i < llen; i++)
209 : {
210 1409907 : rc = _idn2_contextj_rule (label, llen, i);
211 1409907 : if (rc != IDN2_OK)
212 619 : return rc;
213 : }
214 : }
215 :
216 1985680 : if (what & TEST_CONTEXTO)
217 : {
218 : size_t i;
219 0 : for (i = 0; i < llen; i++)
220 0 : if (_idn2_contexto_p (label[i]))
221 0 : return IDN2_CONTEXTO;
222 : }
223 :
224 1985680 : if (what & TEST_CONTEXTO_WITH_RULE)
225 : {
226 : size_t i;
227 2772353 : for (i = 0; i < llen; i++)
228 1402529 : if (_idn2_contexto_p (label[i])
229 6468 : && !_idn2_contexto_with_rule (label[i]))
230 0 : return IDN2_CONTEXTO_NO_RULE;
231 : }
232 :
233 1985680 : if (what & TEST_CONTEXTO_RULE)
234 : {
235 : size_t i;
236 : int rc;
237 :
238 5201 : for (i = 0; i < llen; i++)
239 : {
240 4918 : rc = _idn2_contexto_rule (label, llen, i);
241 4918 : if (rc != IDN2_OK)
242 51 : return rc;
243 : }
244 : }
245 :
246 1985629 : if (what & TEST_UNASSIGNED)
247 : {
248 : size_t i;
249 1951396 : for (i = 0; i < llen; i++)
250 1406596 : if (_idn2_unassigned_p (label[i]))
251 825307 : return IDN2_UNASSIGNED;
252 : }
253 :
254 1160322 : if (what & TEST_BIDI)
255 : {
256 544800 : int rc = _idn2_bidi (label, llen);
257 544800 : if (rc != IDN2_OK)
258 1059 : return rc;
259 : }
260 :
261 1159263 : if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
262 : {
263 : size_t i;
264 1027568 : int transitional = what & TEST_TRANSITIONAL;
265 :
266 : /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
267 2180849 : for (i = 0; i < llen; i++)
268 1153281 : if (label[i] == 0x002E)
269 0 : return IDN2_DOT_IN_LABEL;
270 :
271 : /* TR46: 6. Each code point in the label must only have certain status
272 : * values according to Section 5, IDNA Mapping Table:
273 : * a. For Transitional Processing, each value must be valid.
274 : * b. For Nontransitional Processing, each value must be either valid or deviation. */
275 2172362 : for (i = 0; i < llen; i++)
276 : {
277 : IDNAMap map;
278 :
279 1148290 : get_idna_map (label[i], &map);
280 :
281 1148290 : if (map_is (&map, TR46_FLG_VALID) ||
282 24917 : (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
283 1144794 : continue;
284 :
285 23407 : if (what & TEST_ALLOW_STD3_DISALLOWED &&
286 23233 : (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
287 3458 : map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
288 19911 : continue;
289 :
290 3496 : return transitional ? IDN2_INVALID_TRANSITIONAL :
291 : IDN2_INVALID_NONTRANSITIONAL;
292 : }
293 : }
294 :
295 1155767 : return IDN2_OK;
296 : }
|