LCOV - code coverage report
Current view: top level - builds/libidn/libidn2/lib - lookup.c (source / functions) Hit Total Coverage
Test: Libidn2-2.3.7.3-61ee Code Coverage Lines: 281 321 87.5 %
Date: 2024-12-01 07:08:19 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* lookup.c - implementation of IDNA2008 lookup functions
       2             :    Copyright (C) 2011-2024 Simon Josefsson
       3             :    Copyright (C) 2017-2024 Tim Ruehsen
       4             : 
       5             :    Libidn2 is free software: you can redistribute it and/or modify it
       6             :    under the terms of either:
       7             : 
       8             :      * the GNU Lesser General Public License as published by the Free
       9             :        Software Foundation; either version 3 of the License, or (at
      10             :        your option) any later version.
      11             : 
      12             :    or
      13             : 
      14             :      * the GNU General Public License as published by the Free
      15             :        Software Foundation; either version 2 of the License, or (at
      16             :        your option) any later version.
      17             : 
      18             :    or both in parallel, as here.
      19             : 
      20             :    This program is distributed in the hope that it will be useful,
      21             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      22             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      23             :    GNU General Public License for more details.
      24             : 
      25             :    You should have received copies of the GNU General Public License and
      26             :    the GNU Lesser General Public License along with this program.  If
      27             :    not, see <http://www.gnu.org/licenses/>.
      28             : */
      29             : 
      30             : #include <config.h>
      31             : 
      32             : #include "idn2.h"
      33             : 
      34             : #include <errno.h>                /* errno */
      35             : #include <stdlib.h>               /* malloc, free */
      36             : 
      37             : #include <unitypes.h>
      38             : #include <uniconv.h>              /* u8_strconv_from_locale */
      39             : #include <uninorm.h>              /* u32_normalize */
      40             : #include <unistr.h>               /* u8_to_u32 */
      41             : 
      42             : #include "idna.h"             /* _idn2_label_test */
      43             : #include "tr46map.h"          /* definition for tr46map.c */
      44             : 
      45             : #ifdef HAVE_LIBUNISTRING
      46             : /* copied from gnulib */
      47             : # include <limits.h>
      48             : # define _C_CTYPE_LOWER_N(N) \
      49             :    case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
      50             :    case 'e' + (N): case 'f' + (N): \
      51             :    case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
      52             :    case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
      53             :    case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
      54             :    case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
      55             :    case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
      56             : # define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
      57             : static inline int
      58       25146 : c_tolower (int c)
      59             : {
      60       25146 :   switch (c)
      61             :     {
      62           0 :     _C_CTYPE_UPPER:
      63           0 :       return c - 'A' + 'a';
      64       25146 :     default:
      65       25146 :       return c;
      66             :     }
      67             : }
      68             : 
      69             : static int
      70        1111 : c_strncasecmp (const char *s1, const char *s2, size_t n)
      71             : {
      72        1111 :   register const unsigned char *p1 = (const unsigned char *) s1;
      73        1111 :   register const unsigned char *p2 = (const unsigned char *) s2;
      74             :   unsigned char c1, c2;
      75             : 
      76        1111 :   if (p1 == p2 || n == 0)
      77           0 :     return 0;
      78             : 
      79             :   do
      80             :     {
      81       12573 :       c1 = c_tolower (*p1);
      82       12573 :       c2 = c_tolower (*p2);
      83             : 
      84       12573 :       if (--n == 0 || c1 == '\0')
      85             :         break;
      86             : 
      87       11462 :       ++p1;
      88       11462 :       ++p2;
      89             :     }
      90       11462 :   while (c1 == c2);
      91             : 
      92             :   if (UCHAR_MAX <= INT_MAX)
      93        1111 :     return c1 - c2;
      94             :   else
      95             :     /* On machines where 'char' and 'int' are types of the same size, the
      96             :        difference of two 'unsigned char' values - including the sign bit -
      97             :        doesn't fit in an 'int'.  */
      98             :     return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
      99             : }
     100             : #else
     101             : # include <c-strcase.h>
     102             : #endif
     103             : 
     104             : static int
     105     5577587 : set_default_flags (int *flags)
     106             : {
     107     5577587 :   if (((*flags) & IDN2_TRANSITIONAL) && ((*flags) & IDN2_NONTRANSITIONAL))
     108           2 :     return IDN2_INVALID_FLAGS;
     109             : 
     110     5577585 :   if (((*flags) & (IDN2_TRANSITIONAL | IDN2_NONTRANSITIONAL))
     111     2239376 :       && ((*flags) & IDN2_NO_TR46))
     112           2 :     return IDN2_INVALID_FLAGS;
     113             : 
     114     5577583 :   if (((*flags) & IDN2_ALABEL_ROUNDTRIP)
     115           6 :       && ((*flags) & IDN2_NO_ALABEL_ROUNDTRIP))
     116           0 :     return IDN2_INVALID_FLAGS;
     117             : 
     118     5577583 :   if (!((*flags) & (IDN2_NO_TR46 | IDN2_TRANSITIONAL)))
     119     3345617 :     *flags |= IDN2_NONTRANSITIONAL;
     120             : 
     121     5577583 :   return IDN2_OK;
     122             : }
     123             : 
     124             : static int
     125     1714379 : label (const uint8_t *src, size_t srclen, uint8_t *dst, size_t *dstlen,
     126             :        int flags)
     127             : {
     128             :   size_t plen;
     129     1714379 :   uint32_t *p = NULL;
     130     1714379 :   const uint8_t *src_org = NULL;
     131     1714379 :   uint8_t *src_allocated = NULL;
     132     1714379 :   int rc, check_roundtrip = 0;
     133     1714379 :   size_t tmpl, srclen_org = 0;
     134             :   uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
     135     1714379 :   size_t label32_len = IDN2_LABEL_MAX_LENGTH;
     136             : 
     137     1714379 :   if (_idn2_ascii_p (src, srclen))
     138             :     {
     139       13066 :       if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP) && srclen >= 4
     140        2717 :           && memcmp (src, "xn--", 4) == 0)
     141             :         {
     142             :           /*
     143             :              If the input to this procedure appears to be an A-label
     144             :              (i.e., it starts in "xn--", interpreted
     145             :              case-insensitively), the lookup application MAY attempt to
     146             :              convert it to a U-label, first ensuring that the A-label is
     147             :              entirely in lowercase (converting it to lowercase if
     148             :              necessary), and apply the tests of Section 5.4 and the
     149             :              conversion of Section 5.5 to that form. */
     150        1402 :           rc = idn2_punycode_decode ((char *) src + 4, srclen - 4,
     151             :                                      label_u32, &label32_len);
     152        1402 :           if (rc)
     153           0 :             return rc;
     154             : 
     155        1402 :           check_roundtrip = 1;
     156        1402 :           src_org = src;
     157        1402 :           srclen_org = srclen;
     158             : 
     159        1402 :           srclen = IDN2_LABEL_MAX_LENGTH;
     160        1402 :           src = src_allocated =
     161        1402 :             u32_to_u8 (label_u32, label32_len, NULL, &srclen);
     162        1402 :           if (!src)
     163             :             {
     164           0 :               if (errno == ENOMEM)
     165           0 :                 return IDN2_MALLOC;
     166           0 :               return IDN2_ENCODING_ERROR;
     167             :             }
     168             :         }
     169             :       else
     170             :         {
     171       11664 :           if (srclen > IDN2_LABEL_MAX_LENGTH)
     172          53 :             return IDN2_TOO_BIG_LABEL;
     173       11611 :           if (srclen > *dstlen)
     174           0 :             return IDN2_TOO_BIG_DOMAIN;
     175             : 
     176       11611 :           memcpy (dst, src, srclen);
     177       11611 :           *dstlen = srclen;
     178       11611 :           return IDN2_OK;
     179             :         }
     180             :     }
     181             : 
     182     1702715 :   rc = _idn2_u8_to_u32_nfc (src, srclen, &p, &plen, flags & IDN2_NFC_INPUT);
     183     1702715 :   if (rc != IDN2_OK)
     184           1 :     goto out;
     185             : 
     186     1702714 :   if (!(flags & IDN2_TRANSITIONAL))
     187             :     {
     188     1553421 :       rc = _idn2_label_test (TEST_NFC |
     189             :                              TEST_2HYPHEN |
     190             :                              TEST_LEADING_COMBINING |
     191             :                              TEST_DISALLOWED |
     192             :                              TEST_CONTEXTJ_RULE |
     193             :                              TEST_CONTEXTO_WITH_RULE |
     194     1553421 :                              TEST_UNASSIGNED | TEST_BIDI |
     195             :                              ((flags & IDN2_NONTRANSITIONAL) ?
     196     1553421 :                               TEST_NONTRANSITIONAL : 0) | ((flags &
     197             :                                                             IDN2_USE_STD3_ASCII_RULES)
     198     1553421 :                                                            ? 0 :
     199             :                                                            TEST_ALLOW_STD3_DISALLOWED),
     200             :                              p, plen);
     201             : 
     202     1553421 :       if (rc != IDN2_OK)
     203     1009808 :         goto out;
     204             :     }
     205             : 
     206      692906 :   dst[0] = 'x';
     207      692906 :   dst[1] = 'n';
     208      692906 :   dst[2] = '-';
     209      692906 :   dst[3] = '-';
     210             : 
     211      692906 :   tmpl = *dstlen - 4;
     212      692906 :   rc = idn2_punycode_encode (p, plen, (char *) dst + 4, &tmpl);
     213      692906 :   if (rc != IDN2_OK)
     214         167 :     goto out;
     215             : 
     216             : 
     217      692739 :   *dstlen = 4 + tmpl;
     218             : 
     219      692739 :   if (check_roundtrip)
     220             :     {
     221        1111 :       if (srclen_org != *dstlen
     222        1111 :           || c_strncasecmp ((char *) src_org, (char *) dst, srclen_org))
     223             :         {
     224           3 :           rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
     225           3 :           goto out;
     226             :         }
     227             :     }
     228      691628 :   else if (!(flags & IDN2_NO_ALABEL_ROUNDTRIP))
     229             :     {
     230      691628 :       rc = idn2_punycode_decode ((char *) dst + 4, *dstlen - 4,
     231             :                                  label_u32, &label32_len);
     232      691628 :       if (rc)
     233             :         {
     234         770 :           rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
     235         770 :           goto out;
     236             :         }
     237             : 
     238      690858 :       if (plen != label32_len || u32_cmp (p, label_u32, label32_len))
     239             :         {
     240           0 :           rc = IDN2_ALABEL_ROUNDTRIP_FAILED;
     241           0 :           goto out;
     242             :         }
     243             :     }
     244             : 
     245      691966 :   rc = IDN2_OK;
     246             : 
     247     1702715 : out:
     248     1702715 :   free (p);
     249     1702715 :   free (src_allocated);
     250     1702715 :   return rc;
     251             : }
     252             : 
     253             : #define TR46_TRANSITIONAL_CHECK \
     254             :   (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_TRANSITIONAL)
     255             : #define TR46_NONTRANSITIONAL_CHECK \
     256             :   (TEST_NFC | TEST_2HYPHEN | TEST_HYPHEN_STARTEND | TEST_LEADING_COMBINING | TEST_NONTRANSITIONAL)
     257             : 
     258             : static int
     259     4465183 : _tr46 (const uint8_t *domain_u8, uint8_t **out, int flags)
     260             : {
     261             :   size_t len, it;
     262             :   uint32_t *domain_u32;
     263     4465183 :   int err = IDN2_OK, rc;
     264     4465183 :   int transitional = 0;
     265             :   int test_flags;
     266             : 
     267     4465183 :   if (flags & IDN2_TRANSITIONAL)
     268     1119566 :     transitional = 1;
     269             : 
     270             :   /* convert UTF-8 to UTF-32 */
     271     4465183 :   if (!(domain_u32 =
     272     4465183 :         u8_to_u32 (domain_u8, u8_strlen (domain_u8) + 1, NULL, &len)))
     273             :     {
     274          20 :       if (errno == ENOMEM)
     275           0 :         return IDN2_MALLOC;
     276          20 :       return IDN2_ENCODING_ERROR;
     277             :     }
     278             : 
     279     4465163 :   size_t len2 = 0;
     280     5376652 :   for (it = 0; it < len - 1; it++)
     281             :     {
     282             :       IDNAMap map;
     283             : 
     284     4768483 :       get_idna_map (domain_u32[it], &map);
     285             : 
     286     4768483 :       if (map_is (&map, TR46_FLG_DISALLOWED))
     287             :         {
     288     3856994 :           if (domain_u32[it])
     289             :             {
     290     3856994 :               free (domain_u32);
     291     3856994 :               return IDN2_DISALLOWED;
     292             :             }
     293           0 :           len2++;
     294             :         }
     295      911489 :       else if (map_is (&map, TR46_FLG_MAPPED))
     296             :         {
     297       55667 :           len2 += map.nmappings;
     298             :         }
     299      855822 :       else if (map_is (&map, TR46_FLG_VALID))
     300             :         {
     301      814865 :           len2++;
     302             :         }
     303       40957 :       else if (map_is (&map, TR46_FLG_IGNORED))
     304             :         {
     305        1910 :           continue;
     306             :         }
     307       39047 :       else if (map_is (&map, TR46_FLG_DEVIATION))
     308             :         {
     309        6623 :           if (transitional)
     310             :             {
     311        2652 :               len2 += map.nmappings;
     312             :             }
     313             :           else
     314        3971 :             len2++;
     315             :         }
     316       32424 :       else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
     317             :         {
     318       22515 :           if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
     319             :             {
     320             :               /* valid because UseSTD3ASCIIRules=false, see #TR46 5 */
     321       20895 :               len2++;
     322             :             }
     323        1620 :           else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
     324             :             {
     325             :               /* mapped because UseSTD3ASCIIRules=false, see #TR46 5 */
     326        1620 :               len2 += map.nmappings;
     327             :             }
     328             :         }
     329             :     }
     330             : 
     331             :   /* Exit early if result is too long.
     332             :    * This avoids excessive CPU usage in punycode encoding, which is O(N^2). */
     333      608169 :   if (len2 >= IDN2_DOMAIN_MAX_LENGTH)
     334             :     {
     335          28 :       free (domain_u32);
     336          28 :       return IDN2_TOO_BIG_DOMAIN;
     337             :     }
     338             : 
     339      608141 :   uint32_t *tmp = (uint32_t *) malloc ((len2 + 1) * sizeof (uint32_t));
     340      608141 :   if (!tmp)
     341             :     {
     342           0 :       free (domain_u32);
     343           0 :       return IDN2_MALLOC;
     344             :     }
     345             : 
     346      608141 :   len2 = 0;
     347     1502016 :   for (it = 0; it < len - 1; it++)
     348             :     {
     349      893875 :       uint32_t c = domain_u32[it];
     350             :       IDNAMap map;
     351             : 
     352      893875 :       get_idna_map (c, &map);
     353             : 
     354      893875 :       if (map_is (&map, TR46_FLG_DISALLOWED))
     355             :         {
     356           0 :           tmp[len2++] = c;
     357             :         }
     358      893875 :       else if (map_is (&map, TR46_FLG_MAPPED))
     359             :         {
     360       53593 :           len2 += get_map_data (tmp + len2, &map);
     361             :         }
     362      840282 :       else if (map_is (&map, TR46_FLG_VALID))
     363             :         {
     364      804515 :           tmp[len2++] = c;
     365             :         }
     366       35767 :       else if (map_is (&map, TR46_FLG_IGNORED))
     367             :         {
     368        1676 :           continue;
     369             :         }
     370       34091 :       else if (map_is (&map, TR46_FLG_DEVIATION))
     371             :         {
     372        5705 :           if (transitional)
     373             :             {
     374        2194 :               len2 += get_map_data (tmp + len2, &map);
     375             :             }
     376             :           else
     377        3511 :             tmp[len2++] = c;
     378             :         }
     379       28386 :       else if (!(flags & IDN2_USE_STD3_ASCII_RULES))
     380             :         {
     381       18603 :           if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID))
     382             :             {
     383       17007 :               tmp[len2++] = c;
     384             :             }
     385        1596 :           else if (map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
     386             :             {
     387        1596 :               len2 += get_map_data (tmp + len2, &map);
     388             :             }
     389             :         }
     390             :     }
     391      608141 :   free (domain_u32);
     392             : 
     393             :   /* Normalize to NFC */
     394      608141 :   tmp[len2] = 0;
     395      608141 :   domain_u32 = u32_normalize (UNINORM_NFC, tmp, len2 + 1, NULL, &len);
     396      608141 :   free (tmp);
     397      608141 :   tmp = NULL;
     398             : 
     399      608141 :   if (!domain_u32)
     400             :     {
     401           0 :       if (errno == ENOMEM)
     402           0 :         return IDN2_MALLOC;
     403           0 :       return IDN2_ENCODING_ERROR;
     404             :     }
     405             : 
     406             :   /* split into labels and check */
     407             :   uint32_t *e, *s;
     408     1238102 :   for (e = s = domain_u32; *e; s = e)
     409             :     {
     410     1488135 :       while (*e && *e != '.')
     411      857275 :         e++;
     412             : 
     413      630860 :       if (e - s >= 4 && s[0] == 'x' && s[1] == 'n' && s[2] == '-'
     414       10284 :           && s[3] == '-')
     415        9094 :         {
     416             :           /* decode punycode and check result non-transitional */
     417             :           size_t ace_len;
     418             :           uint32_t name_u32[IDN2_LABEL_MAX_LENGTH];
     419        9993 :           size_t name_len = IDN2_LABEL_MAX_LENGTH;
     420             :           uint8_t *ace;
     421             : 
     422        9993 :           ace = u32_to_u8 (s + 4, e - s - 4, NULL, &ace_len);
     423        9993 :           if (!ace)
     424             :             {
     425           0 :               free (domain_u32);
     426           0 :               if (errno == ENOMEM)
     427         899 :                 return IDN2_MALLOC;
     428           0 :               return IDN2_ENCODING_ERROR;
     429             :             }
     430             : 
     431        9993 :           rc = idn2_punycode_decode ((char *) ace, ace_len,
     432             :                                      name_u32, &name_len);
     433             : 
     434        9993 :           free (ace);
     435             : 
     436        9993 :           if (rc)
     437             :             {
     438         899 :               free (domain_u32);
     439         899 :               return rc;
     440             :             }
     441             : 
     442        9094 :           test_flags = TR46_NONTRANSITIONAL_CHECK;
     443             : 
     444        9094 :           if (!(flags & IDN2_USE_STD3_ASCII_RULES))
     445        8733 :             test_flags |= TEST_ALLOW_STD3_DISALLOWED;
     446             : 
     447        9094 :           if ((rc = _idn2_label_test (test_flags, name_u32, name_len)))
     448        5515 :             err = rc;
     449             :         }
     450             :       else
     451             :         {
     452      620867 :           test_flags =
     453      620867 :             transitional ? TR46_TRANSITIONAL_CHECK :
     454             :             TR46_NONTRANSITIONAL_CHECK;
     455             : 
     456      620867 :           if (!(flags & IDN2_USE_STD3_ASCII_RULES))
     457      614950 :             test_flags |= TEST_ALLOW_STD3_DISALLOWED;
     458             : 
     459      620867 :           if ((rc = _idn2_label_test (test_flags, s, e - s)))
     460       12420 :             err = rc;
     461             :         }
     462             : 
     463      629961 :       if (*e)
     464       24942 :         e++;
     465             :     }
     466             : 
     467      607242 :   if (err == IDN2_OK && out)
     468      593115 :     {
     469      593115 :       uint8_t *_out = u32_to_u8 (domain_u32, len, NULL, &len);
     470      593115 :       free (domain_u32);
     471             : 
     472      593115 :       if (!_out)
     473             :         {
     474           0 :           if (errno == ENOMEM)
     475           0 :             return IDN2_MALLOC;
     476           0 :           return IDN2_ENCODING_ERROR;
     477             :         }
     478             : 
     479      593115 :       *out = _out;
     480             :     }
     481             :   else
     482       14127 :     free (domain_u32);
     483             : 
     484      607242 :   return err;
     485             : }
     486             : 
     487             : /**
     488             :  * idn2_lookup_u8:
     489             :  * @src: input zero-terminated UTF-8 string in Unicode NFC normalized form.
     490             :  * @lookupname: newly allocated output variable with name to lookup in DNS.
     491             :  * @flags: optional #idn2_flags to modify behaviour.
     492             :  *
     493             :  * Perform IDNA2008 lookup string conversion on domain name @src, as
     494             :  * described in section 5 of RFC 5891.  Note that the input string
     495             :  * must be encoded in UTF-8 and be in Unicode NFC form.
     496             :  *
     497             :  * Pass %IDN2_NFC_INPUT in @flags to convert input to NFC form before
     498             :  * further processing.  %IDN2_TRANSITIONAL and %IDN2_NONTRANSITIONAL
     499             :  * do already imply %IDN2_NFC_INPUT.
     500             :  *
     501             :  * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
     502             :  * convert any input A-labels to U-labels and perform additional
     503             :  * testing. This is default since version 2.2.
     504             :  * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
     505             :  *
     506             :  * Pass %IDN2_TRANSITIONAL to enable Unicode TR46
     507             :  * transitional processing, and %IDN2_NONTRANSITIONAL to enable
     508             :  * Unicode TR46 non-transitional processing.
     509             :  *
     510             :  * Multiple flags may be specified by binary or:ing them together.
     511             :  *
     512             :  * After version 2.0.3: %IDN2_USE_STD3_ASCII_RULES disabled by default.
     513             :  * Previously we were eliminating non-STD3 characters from domain strings
     514             :  * such as _443._tcp.example.com, or IPs 1.2.3.4/24 provided to libidn2
     515             :  * functions. That was an unexpected regression for applications switching
     516             :  * from libidn and thus it is no longer applied by default.
     517             :  * Use %IDN2_USE_STD3_ASCII_RULES to enable that behavior again.
     518             :  *
     519             :  * After version 0.11: @lookupname may be NULL to test lookup of @src
     520             :  * without allocating memory.
     521             :  *
     522             :  * Returns: On successful conversion %IDN2_OK is returned, if the
     523             :  *   output domain or any label would have been too long
     524             :  *   %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
     525             :  *   another error code is returned.
     526             :  *
     527             :  * Since: 0.1
     528             :  **/
     529             : int
     530     5915510 : idn2_lookup_u8 (const uint8_t *src, uint8_t **lookupname, int flags)
     531             : {
     532     5915510 :   size_t lookupnamelen = 0;
     533             :   uint8_t _lookupname[IDN2_DOMAIN_MAX_LENGTH + 1];
     534     5915510 :   uint8_t *src_allocated = NULL;
     535             :   int rc;
     536             : 
     537     5915510 :   if (src == NULL)
     538             :     {
     539      337923 :       if (lookupname)
     540      337919 :         *lookupname = NULL;
     541      337923 :       return IDN2_OK;
     542             :     }
     543             : 
     544     5577587 :   rc = set_default_flags (&flags);
     545     5577587 :   if (rc != IDN2_OK)
     546           4 :     return rc;
     547             : 
     548     5577583 :   if (!(flags & IDN2_NO_TR46))
     549             :     {
     550     4465183 :       uint8_t *out = NULL;
     551             : 
     552     4465183 :       rc = _tr46 (src, &out, flags);
     553     4465183 :       if (rc != IDN2_OK)
     554     3872068 :         return rc;
     555             : 
     556      593115 :       src = src_allocated = out;
     557             :     }
     558             : 
     559             :   do
     560             :     {
     561     1714379 :       const uint8_t *end = (uint8_t *) strchrnul ((const char *) src, '.');
     562             :       /* XXX Do we care about non-U+002E dots such as U+3002, U+FF0E
     563             :          and U+FF61 here?  Perhaps when IDN2_NFC_INPUT? */
     564     1714379 :       size_t labellen = end - src;
     565             :       uint8_t tmp[IDN2_LABEL_MAX_LENGTH];
     566     1714379 :       size_t tmplen = IDN2_LABEL_MAX_LENGTH;
     567             : 
     568     1714379 :       rc = label (src, labellen, tmp, &tmplen, flags);
     569     1714379 :       if (rc != IDN2_OK)
     570             :         {
     571     1010802 :           free (src_allocated);
     572     1010859 :           return rc;
     573             :         }
     574             : 
     575      703577 :       if (lookupnamelen + tmplen
     576      703577 :           > IDN2_DOMAIN_MAX_LENGTH - (tmplen == 0 && *end == '\0' ? 1 : 2))
     577             :         {
     578          57 :           free (src_allocated);
     579          57 :           return IDN2_TOO_BIG_DOMAIN;
     580             :         }
     581             : 
     582      703520 :       memcpy (_lookupname + lookupnamelen, tmp, tmplen);
     583      703520 :       lookupnamelen += tmplen;
     584             : 
     585      703520 :       if (*end == '.')
     586             :         {
     587        8864 :           if (lookupnamelen + 1 > IDN2_DOMAIN_MAX_LENGTH)
     588             :             {
     589           0 :               free (src_allocated);
     590           0 :               return IDN2_TOO_BIG_DOMAIN;
     591             :             }
     592             : 
     593        8864 :           _lookupname[lookupnamelen] = '.';
     594        8864 :           lookupnamelen++;
     595             :         }
     596      703520 :       _lookupname[lookupnamelen] = '\0';
     597             : 
     598      703520 :       src = end;
     599             :     }
     600      703520 :   while (*src++);
     601             : 
     602      694656 :   free (src_allocated);
     603             : 
     604      694656 :   if (lookupname)
     605             :     {
     606      694655 :       uint8_t *tmp = (uint8_t *) malloc (lookupnamelen + 1);
     607             : 
     608      694655 :       if (tmp == NULL)
     609           0 :         return IDN2_MALLOC;
     610             : 
     611      694655 :       memcpy (tmp, _lookupname, lookupnamelen + 1);
     612      694655 :       *lookupname = tmp;
     613             :     }
     614             : 
     615      694656 :   return IDN2_OK;
     616             : }
     617             : 
     618             : /**
     619             :  * idn2_lookup_ul:
     620             :  * @src: input zero-terminated locale encoded string.
     621             :  * @lookupname: newly allocated output variable with name to lookup in DNS.
     622             :  * @flags: optional #idn2_flags to modify behaviour.
     623             :  *
     624             :  * Perform IDNA2008 lookup string conversion on domain name @src, as
     625             :  * described in section 5 of RFC 5891.  Note that the input is assumed
     626             :  * to be encoded in the locale's default coding system, and will be
     627             :  * transcoded to UTF-8 and NFC normalized by this function.
     628             :  *
     629             :  * Pass %IDN2_ALABEL_ROUNDTRIP in @flags to
     630             :  * convert any input A-labels to U-labels and perform additional
     631             :  * testing. This is default since version 2.2.
     632             :  * To switch this behavior off, pass IDN2_NO_ALABEL_ROUNDTRIP
     633             :  *
     634             :  * Pass %IDN2_TRANSITIONAL to enable Unicode TR46 transitional processing,
     635             :  * and %IDN2_NONTRANSITIONAL to enable Unicode TR46 non-transitional
     636             :  * processing.
     637             :  *
     638             :  * Multiple flags may be specified by binary or:ing them together, for
     639             :  * example %IDN2_ALABEL_ROUNDTRIP | %IDN2_NONTRANSITIONAL.
     640             :  *
     641             :  * The %IDN2_NFC_INPUT in @flags is always enabled in this function.
     642             :  *
     643             :  * After version 0.11: @lookupname may be NULL to test lookup of @src
     644             :  * without allocating memory.
     645             :  *
     646             :  * Returns: On successful conversion %IDN2_OK is returned, if
     647             :  *   conversion from locale to UTF-8 fails then %IDN2_ICONV_FAIL is
     648             :  *   returned, if the output domain or any label would have been too
     649             :  *   long %IDN2_TOO_BIG_DOMAIN or %IDN2_TOO_BIG_LABEL is returned, or
     650             :  *   another error code is returned.
     651             :  *
     652             :  * Since: 0.1
     653             :  **/
     654             : int
     655        3725 : idn2_lookup_ul (const char *src, char **lookupname, int flags)
     656             : {
     657        3725 :   uint8_t *utf8src = NULL;
     658             :   int rc;
     659             : 
     660        3725 :   if (src)
     661             :     {
     662        3721 :       const char *encoding = locale_charset ();
     663             : 
     664        3721 :       utf8src = u8_strconv_from_encoding (src, encoding, iconveh_error);
     665             : 
     666        3721 :       if (!utf8src)
     667             :         {
     668        3252 :           if (errno == ENOMEM)
     669           0 :             return IDN2_MALLOC;
     670        3252 :           return IDN2_ICONV_FAIL;
     671             :         }
     672             :     }
     673             : 
     674         473 :   rc = idn2_lookup_u8 (utf8src, (uint8_t **) lookupname,
     675             :                        flags | IDN2_NFC_INPUT);
     676             : 
     677         473 :   free (utf8src);
     678             : 
     679         473 :   return rc;
     680             : }
     681             : 
     682             : /**
     683             :  * idn2_to_ascii_4i:
     684             :  * @input: zero terminated input Unicode (UCS-4) string.
     685             :  * @inlen: number of elements in @input.
     686             :  * @output: output zero terminated string that must have room for at
     687             :  *       least 63 characters plus the terminating zero.
     688             :  * @flags: optional #idn2_flags to modify behaviour.
     689             :  *
     690             :  * The ToASCII operation takes a sequence of Unicode code points that make
     691             :  * up one domain label and transforms it into a sequence of code points in
     692             :  * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
     693             :  * the resulting sequence are equivalent labels.
     694             :  *
     695             :  * It is important to note that the ToASCII operation can fail.
     696             :  * ToASCII fails if any step of it fails. If any step of the
     697             :  * ToASCII operation fails on any label in a domain name, that domain
     698             :  * name MUST NOT be used as an internationalized domain name.
     699             :  * The method for dealing with this failure is application-specific.
     700             :  *
     701             :  * The inputs to ToASCII are a sequence of code points.
     702             :  *
     703             :  * ToASCII never alters a sequence of code points that are all in the ASCII
     704             :  * range to begin with (although it could fail). Applying the ToASCII operation multiple
     705             :  * effect as applying it just once.
     706             :  *
     707             :  * The default behavior of this function (when flags are zero) is to apply
     708             :  * the IDNA2008 rules without the TR46 amendments. As the TR46
     709             :  * non-transitional processing is nowadays ubiquitous, when unsure, it is
     710             :  * recommended to call this function with the %IDN2_NONTRANSITIONAL
     711             :  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
     712             :  *
     713             :  * Warning: With version 2.1.1 until before version 2.3.5 this
     714             :  * function was deprecated in favor idn2_to_ascii_4i2().  We still
     715             :  * encourage you to use idn2_to_ascii_4i2() when appropriate.
     716             :  *
     717             :  * Returns: On successful conversion %IDN2_OK is returned; if the
     718             :  *   output label would have been too long %IDN2_TOO_BIG_LABEL is
     719             :  *   returned, or another error code is returned.
     720             :  *
     721             :  * Since: 2.0.0
     722             :  **/
     723             : int
     724         172 : idn2_to_ascii_4i (const uint32_t *input, size_t inlen, char *output,
     725             :                   int flags)
     726             : {
     727             :   char *out;
     728             :   int rc;
     729             : 
     730         172 :   if (!input)
     731             :     {
     732           0 :       if (output)
     733           0 :         *output = 0;
     734           0 :       return IDN2_OK;
     735             :     }
     736             : 
     737         172 :   rc = idn2_to_ascii_4i2 (input, inlen, &out, flags);
     738         172 :   if (rc == IDN2_OK)
     739             :     {
     740           1 :       size_t len = strlen (out);
     741             : 
     742           1 :       if (len > IDN2_LABEL_MAX_LENGTH)
     743           0 :         rc = IDN2_TOO_BIG_LABEL;
     744           1 :       else if (output)
     745           1 :         strcpy (output, out);
     746             : 
     747           1 :       free (out);
     748             :     }
     749             : 
     750         172 :   return rc;
     751             : }
     752             : 
     753             : /**
     754             :  * idn2_to_ascii_4i2:
     755             :  * @input: zero terminated input Unicode (UCS-4) string.
     756             :  * @inlen: number of elements in @input.
     757             :  * @output: pointer to newly allocated zero-terminated output string.
     758             :  * @flags: optional #idn2_flags to modify behaviour.
     759             :  *
     760             :  * The ToASCII operation takes a sequence of Unicode code points that make
     761             :  * up one domain label and transforms it into a sequence of code points in
     762             :  * the ASCII range (0..7F). If ToASCII succeeds, the original sequence and
     763             :  * the resulting sequence are equivalent labels.
     764             :  *
     765             :  * It is important to note that the ToASCII operation can fail.
     766             :  * ToASCII fails if any step of it fails. If any step of the
     767             :  * ToASCII operation fails on any label in a domain name, that domain
     768             :  * name MUST NOT be used as an internationalized domain name.
     769             :  * The method for dealing with this failure is application-specific.
     770             :  *
     771             :  * The inputs to ToASCII are a sequence of code points.
     772             :  *
     773             :  * ToASCII never alters a sequence of code points that are all in the ASCII
     774             :  * range to begin with (although it could fail). Applying the ToASCII operation multiple
     775             :  * effect as applying it just once.
     776             :  *
     777             :  * The default behavior of this function (when flags are zero) is to apply
     778             :  * the IDNA2008 rules without the TR46 amendments. As the TR46
     779             :  * non-transitional processing is nowadays ubiquitous, when unsure, it is
     780             :  * recommended to call this function with the %IDN2_NONTRANSITIONAL
     781             :  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
     782             :  *
     783             :  * Returns: On successful conversion %IDN2_OK is returned; if the
     784             :  *   output label would have been too long %IDN2_TOO_BIG_LABEL is
     785             :  *   returned, or another error code is returned.
     786             :  *
     787             :  * Since: 2.1.1
     788             :  **/
     789             : int
     790        1208 : idn2_to_ascii_4i2 (const uint32_t *input, size_t inlen, char **output,
     791             :                    int flags)
     792             : {
     793             :   uint32_t *input_u32;
     794             :   uint8_t *input_u8, *output_u8;
     795             :   size_t length;
     796             :   int rc;
     797             : 
     798        1208 :   if (!input)
     799             :     {
     800           2 :       if (output)
     801           2 :         *output = NULL;
     802           2 :       return IDN2_OK;
     803             :     }
     804             : 
     805        1206 :   input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
     806        1206 :   if (!input_u32)
     807           0 :     return IDN2_MALLOC;
     808             : 
     809        1206 :   u32_cpy (input_u32, input, inlen);
     810        1206 :   input_u32[inlen] = 0;
     811             : 
     812        1206 :   input_u8 = u32_to_u8 (input_u32, inlen + 1, NULL, &length);
     813        1206 :   free (input_u32);
     814        1206 :   if (!input_u8)
     815             :     {
     816        1148 :       if (errno == ENOMEM)
     817           0 :         return IDN2_MALLOC;
     818        1148 :       return IDN2_ENCODING_ERROR;
     819             :     }
     820             : 
     821          58 :   rc = idn2_lookup_u8 (input_u8, &output_u8, flags);
     822          58 :   free (input_u8);
     823             : 
     824          58 :   if (rc == IDN2_OK)
     825             :     {
     826          16 :       if (output)
     827          16 :         *output = (char *) output_u8;
     828             :       else
     829           0 :         free (output_u8);
     830             :     }
     831             : 
     832          58 :   return rc;
     833             : }
     834             : 
     835             : /**
     836             :  * idn2_to_ascii_4z:
     837             :  * @input: zero terminated input Unicode (UCS-4) string.
     838             :  * @output: pointer to newly allocated zero-terminated output string.
     839             :  * @flags: optional #idn2_flags to modify behaviour.
     840             :  *
     841             :  * Convert UCS-4 domain name to ASCII string using the IDNA2008
     842             :  * rules.  The domain name may contain several labels, separated by dots.
     843             :  * The output buffer must be deallocated by the caller.
     844             :  *
     845             :  * The default behavior of this function (when flags are zero) is to apply
     846             :  * the IDNA2008 rules without the TR46 amendments. As the TR46
     847             :  * non-transitional processing is nowadays ubiquitous, when unsure, it is
     848             :  * recommended to call this function with the %IDN2_NONTRANSITIONAL
     849             :  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
     850             :  *
     851             :  * Return value: Returns %IDN2_OK on success, or error code.
     852             :  *
     853             :  * Since: 2.0.0
     854             :  **/
     855             : int
     856        1034 : idn2_to_ascii_4z (const uint32_t *input, char **output, int flags)
     857             : {
     858             :   uint8_t *input_u8;
     859             :   size_t length;
     860             :   int rc;
     861             : 
     862        1034 :   if (!input)
     863             :     {
     864           0 :       if (output)
     865           0 :         *output = NULL;
     866           0 :       return IDN2_OK;
     867             :     }
     868             : 
     869        1034 :   input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
     870        1034 :   if (!input_u8)
     871             :     {
     872         972 :       if (errno == ENOMEM)
     873           0 :         return IDN2_MALLOC;
     874         972 :       return IDN2_ENCODING_ERROR;
     875             :     }
     876             : 
     877          62 :   rc = idn2_lookup_u8 (input_u8, (uint8_t **) output, flags);
     878          62 :   free (input_u8);
     879             : 
     880          62 :   return rc;
     881             : }
     882             : 
     883             : /**
     884             :  * idn2_to_ascii_8z:
     885             :  * @input: zero terminated input UTF-8 string.
     886             :  * @output: pointer to newly allocated output string.
     887             :  * @flags: optional #idn2_flags to modify behaviour.
     888             :  *
     889             :  * Convert UTF-8 domain name to ASCII string using the IDNA2008
     890             :  * rules.  The domain name may contain several labels, separated by dots.
     891             :  * The output buffer must be deallocated by the caller.
     892             :  *
     893             :  * The default behavior of this function (when flags are zero) is to apply
     894             :  * the IDNA2008 rules without the TR46 amendments. As the TR46
     895             :  * non-transitional processing is nowadays ubiquitous, when unsure, it is
     896             :  * recommended to call this function with the %IDN2_NONTRANSITIONAL
     897             :  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
     898             :  *
     899             :  * Return value: Returns %IDN2_OK on success, or error code.
     900             :  *
     901             :  * Since: 2.0.0
     902             :  **/
     903             : int
     904        3710 : idn2_to_ascii_8z (const char *input, char **output, int flags)
     905             : {
     906        3710 :   return idn2_lookup_u8 ((const uint8_t *) input, (uint8_t **) output, flags);
     907             : }
     908             : 
     909             : /**
     910             :  * idn2_to_ascii_lz:
     911             :  * @input: zero terminated input UTF-8 string.
     912             :  * @output: pointer to newly allocated output string.
     913             :  * @flags: optional #idn2_flags to modify behaviour.
     914             :  *
     915             :  * Convert a domain name in locale's encoding to ASCII string using the IDNA2008
     916             :  * rules.  The domain name may contain several labels, separated by dots.
     917             :  * The output buffer must be deallocated by the caller.
     918             :  *
     919             :  * The default behavior of this function (when flags are zero) is to apply
     920             :  * the IDNA2008 rules without the TR46 amendments. As the TR46
     921             :  * non-transitional processing is nowadays ubiquitous, when unsure, it is
     922             :  * recommended to call this function with the %IDN2_NONTRANSITIONAL
     923             :  * and the %IDN2_NFC_INPUT flags for compatibility with other software.
     924             :  *
     925             :  * Returns: %IDN2_OK on success, or error code.
     926             :  * Same as described in idn2_lookup_ul() documentation.
     927             :  *
     928             :  * Since: 2.0.0
     929             :  **/
     930             : int
     931        3712 : idn2_to_ascii_lz (const char *input, char **output, int flags)
     932             : {
     933        3712 :   return idn2_lookup_ul (input, output, flags);
     934             : }

Generated by: LCOV version 1.16