LCOV - code coverage report
Current view: top level - builds/libidn/libidn2/lib - decode.c (source / functions) Hit Total Coverage
Test: Libidn2-2.3.7.2-64ab Code Coverage Lines: 119 128 93.0 %
Date: 2024-04-01 07:07:37 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* decode.c - implementation of IDNA2008 decoding functions
       2             :    Copyright (C) 2011-2024 Simon Josefsson
       3             : 
       4             :    Libidn2 is free software: you can redistribute it and/or modify it
       5             :    under the terms of either:
       6             : 
       7             :      * the GNU Lesser General Public License as published by the Free
       8             :        Software Foundation; either version 3 of the License, or (at
       9             :        your option) any later version.
      10             : 
      11             :    or
      12             : 
      13             :      * the GNU General Public License as published by the Free
      14             :        Software Foundation; either version 2 of the License, or (at
      15             :        your option) any later version.
      16             : 
      17             :    or both in parallel, as here.
      18             : 
      19             :    This program is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      22             :    GNU General Public License for more details.
      23             : 
      24             :    You should have received copies of the GNU General Public License and
      25             :    the GNU Lesser General Public License along with this program.  If
      26             :    not, see <http://www.gnu.org/licenses/>.
      27             : */
      28             : 
      29             : #include <config.h>
      30             : 
      31             : #include "idn2.h"
      32             : 
      33             : #include <errno.h>                /* errno */
      34             : #include <stdlib.h>               /* malloc, free */
      35             : 
      36             : #include <unitypes.h>
      37             : #include <uniconv.h>              /* u8_strconv_from_locale */
      38             : #include <unistr.h>               /* u8_to_u32, u32_cpy, ... */
      39             : 
      40             : /**
      41             :  * idn2_to_unicode_8z4z:
      42             :  * @input: Input zero-terminated UTF-8 string.
      43             :  * @output: Newly allocated UTF-32/UCS-4 output string.
      44             :  * @flags: Currently unused.
      45             :  *
      46             :  * Converts a possibly ACE encoded domain name in UTF-8 format into a
      47             :  * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
      48             :  * and must be deallocated by the caller.
      49             :  *
      50             :  * @output may be NULL to test lookup of @input without allocating memory.
      51             :  *
      52             :  * Returns:
      53             :  *   %IDN2_OK: The conversion was successful.
      54             :  *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
      55             :  *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
      56             :  *   %IDN2_ENCODING_ERROR: Character conversion failed.
      57             :  *   %IDN2_MALLOC: Memory allocation failed.
      58             :  *
      59             :  * Since: 2.0.0
      60             :  **/
      61             : int
      62         290 : idn2_to_unicode_8z4z (const char *input, uint32_t **output,
      63             :                       G_GNUC_UNUSED int flags)
      64             : {
      65             :   uint32_t *domain_u32;
      66             :   int rc;
      67             : 
      68         290 :   if (!input)
      69             :     {
      70           6 :       if (output)
      71           5 :         *output = NULL;
      72           6 :       return IDN2_OK;
      73             :     }
      74             : 
      75             :   /* split into labels and check */
      76             :   uint32_t out_u32[IDN2_DOMAIN_MAX_LENGTH + 1];
      77         284 :   size_t out_len = 0;
      78             :   const char *e, *s;
      79             : 
      80        1171 :   for (e = s = input; *e; s = e)
      81             :     {
      82             :       uint32_t label_u32[IDN2_LABEL_MAX_LENGTH];
      83         940 :       size_t label_len = IDN2_LABEL_MAX_LENGTH;
      84             : 
      85       10863 :       while (*e && *e != '.')
      86        9923 :         e++;
      87             : 
      88         940 :       if (e - s >= 4 && (s[0] == 'x' || s[0] == 'X')
      89         586 :           && (s[1] == 'n' || s[1] == 'N') && s[2] == '-' && s[3] == '-')
      90             :         {
      91         458 :           s += 4;
      92             : 
      93         458 :           rc = idn2_punycode_decode ((char *) s, e - s,
      94             :                                      label_u32, &label_len);
      95         458 :           if (rc)
      96          53 :             return rc;
      97             : 
      98         411 :           if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
      99           1 :             return IDN2_TOO_BIG_DOMAIN;
     100             : 
     101         410 :           u32_cpy (out_u32 + out_len, label_u32, label_len);
     102             :         }
     103             :       else
     104             :         {
     105             :           /* convert UTF-8 input to UTF-32 */
     106         482 :           if (!
     107             :               (domain_u32 =
     108         482 :                u8_to_u32 ((uint8_t *) s, e - s, NULL, &label_len)))
     109             :             {
     110           3 :               if (errno == ENOMEM)
     111           0 :                 return IDN2_MALLOC;
     112           3 :               return IDN2_ENCODING_ERROR;
     113             :             }
     114             : 
     115         479 :           if (label_len > IDN2_LABEL_MAX_LENGTH)
     116             :             {
     117           1 :               free (domain_u32);
     118           1 :               return IDN2_TOO_BIG_LABEL;
     119             :             }
     120             : 
     121         478 :           if (out_len + label_len + (*e == '.') > IDN2_DOMAIN_MAX_LENGTH)
     122             :             {
     123           1 :               free (domain_u32);
     124           1 :               return IDN2_TOO_BIG_DOMAIN;
     125             :             }
     126             : 
     127         477 :           u32_cpy (out_u32 + out_len, domain_u32, label_len);
     128         477 :           free (domain_u32);
     129             :         }
     130             : 
     131         887 :       out_len += label_len;
     132         887 :       if (*e)
     133             :         {
     134         661 :           out_u32[out_len++] = '.';
     135         661 :           e++;
     136             :         }
     137             :     }
     138             : 
     139         231 :   if (output)
     140             :     {
     141             :       uint32_t *_out;
     142             : 
     143         229 :       out_u32[out_len] = 0;
     144             : 
     145         229 :       _out = u32_cpy_alloc (out_u32, out_len + 1);
     146         229 :       if (!_out)
     147             :         {
     148           0 :           if (errno == ENOMEM)
     149           0 :             return IDN2_MALLOC;
     150           0 :           return IDN2_ENCODING_ERROR;
     151             :         }
     152             : 
     153         229 :       *output = _out;
     154             :     }
     155             : 
     156         231 :   return IDN2_OK;
     157             : }
     158             : 
     159             : /**
     160             :  * idn2_to_unicode_4z4z:
     161             :  * @input: Input zero-terminated UTF-32 string.
     162             :  * @output: Newly allocated UTF-32 output string.
     163             :  * @flags: Currently unused.
     164             :  *
     165             :  * Converts a possibly ACE encoded domain name in UTF-32 format into a
     166             :  * UTF-32 string (punycode decoding). The output buffer will be zero-terminated
     167             :  * and must be deallocated by the caller.
     168             :  *
     169             :  * @output may be NULL to test lookup of @input without allocating memory.
     170             :  *
     171             :  * Returns:
     172             :  *   %IDN2_OK: The conversion was successful.
     173             :  *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
     174             :  *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
     175             :  *   %IDN2_ENCODING_ERROR: Character conversion failed.
     176             :  *   %IDN2_MALLOC: Memory allocation failed.
     177             :  *
     178             :  * Since: 2.0.0
     179             :  **/
     180             : int
     181          91 : idn2_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
     182             : {
     183             :   uint8_t *input_u8;
     184             :   uint32_t *output_u32;
     185             :   size_t length;
     186             :   int rc;
     187             : 
     188          91 :   if (!input)
     189             :     {
     190           2 :       if (output)
     191           1 :         *output = NULL;
     192           2 :       return IDN2_OK;
     193             :     }
     194             : 
     195          89 :   input_u8 = u32_to_u8 (input, u32_strlen (input) + 1, NULL, &length);
     196          89 :   if (!input_u8)
     197             :     {
     198           9 :       if (errno == ENOMEM)
     199           0 :         return IDN2_MALLOC;
     200           9 :       return IDN2_ENCODING_ERROR;
     201             :     }
     202             : 
     203          80 :   rc = idn2_to_unicode_8z4z ((char *) input_u8, &output_u32, flags);
     204          80 :   free (input_u8);
     205             : 
     206          80 :   if (rc == IDN2_OK)
     207             :     {
     208          69 :       if (output)
     209          67 :         *output = output_u32;
     210             :       else
     211           2 :         free (output_u32);
     212             :     }
     213             : 
     214          80 :   return rc;
     215             : }
     216             : 
     217             : /**
     218             :  * idn2_to_unicode_44i:
     219             :  * @in: Input array with UTF-32 code points.
     220             :  * @inlen: number of code points of input array
     221             :  * @out: output array with UTF-32 code points.
     222             :  * @outlen: on input, maximum size of output array with UTF-32 code points,
     223             :  *          on exit, actual size of output array with UTF-32 code points.
     224             :  * @flags: Currently unused.
     225             :  *
     226             :  * The ToUnicode operation takes a sequence of UTF-32 code points
     227             :  * that make up one domain label and returns a sequence of UTF-32
     228             :  * code points. If the input sequence is a label in ACE form, then the
     229             :  * result is an equivalent internationalized label that is not in ACE
     230             :  * form, otherwise the original sequence is returned unaltered.
     231             :  *
     232             :  * @output may be NULL to test lookup of @input without allocating memory.
     233             :  *
     234             :  * Returns:
     235             :  *   %IDN2_OK: The conversion was successful.
     236             :  *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
     237             :  *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
     238             :  *   %IDN2_ENCODING_ERROR: Character conversion failed.
     239             :  *   %IDN2_MALLOC: Memory allocation failed.
     240             :  *
     241             :  * Since: 2.0.0
     242             :  **/
     243             : int
     244          58 : idn2_to_unicode_44i (const uint32_t *in, size_t inlen, uint32_t *out,
     245             :                      size_t *outlen, int flags)
     246             : {
     247             :   uint32_t *input_u32;
     248             :   uint32_t *output_u32;
     249             :   size_t len;
     250             :   int rc;
     251             : 
     252          58 :   if (!in)
     253             :     {
     254           5 :       if (outlen)
     255           3 :         *outlen = 0;
     256           5 :       return IDN2_OK;
     257             :     }
     258             : 
     259          53 :   input_u32 = (uint32_t *) malloc ((inlen + 1) * sizeof (uint32_t));
     260          53 :   if (!input_u32)
     261           0 :     return IDN2_MALLOC;
     262             : 
     263          53 :   u32_cpy (input_u32, in, inlen);
     264          53 :   input_u32[inlen] = 0;
     265             : 
     266          53 :   rc = idn2_to_unicode_4z4z (input_u32, &output_u32, flags);
     267          53 :   free (input_u32);
     268          53 :   if (rc != IDN2_OK)
     269          15 :     return rc;
     270             : 
     271          38 :   len = u32_strlen (output_u32);
     272          38 :   if (out && outlen)
     273          34 :     u32_cpy (out, output_u32, len < *outlen ? len : *outlen);
     274          38 :   free (output_u32);
     275             : 
     276          38 :   if (outlen)
     277          35 :     *outlen = len;
     278             : 
     279          38 :   return IDN2_OK;
     280             : }
     281             : 
     282             : /**
     283             :  * idn2_to_unicode_8z8z:
     284             :  * @input: Input zero-terminated UTF-8 string.
     285             :  * @output: Newly allocated UTF-8 output string.
     286             :  * @flags: Currently unused.
     287             :  *
     288             :  * Converts a possibly ACE encoded domain name in UTF-8 format into a
     289             :  * UTF-8 string (punycode decoding). The output buffer will be zero-terminated
     290             :  * and must be deallocated by the caller.
     291             :  *
     292             :  * @output may be NULL to test lookup of @input without allocating memory.
     293             :  *
     294             :  * Returns:
     295             :  *   %IDN2_OK: The conversion was successful.
     296             :  *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
     297             :  *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
     298             :  *   %IDN2_ENCODING_ERROR: Character conversion failed.
     299             :  *   %IDN2_MALLOC: Memory allocation failed.
     300             :  *
     301             :  * Since: 2.0.0
     302             :  **/
     303             : int
     304         171 : idn2_to_unicode_8z8z (const char *input, char **output, int flags)
     305             : {
     306             :   uint32_t *output_u32;
     307             :   uint8_t *output_u8;
     308             :   size_t length;
     309             :   int rc;
     310             : 
     311         171 :   rc = idn2_to_unicode_8z4z (input, &output_u32, flags);
     312         171 :   if (rc != IDN2_OK || !input)
     313          40 :     return rc;
     314             : 
     315             :   output_u8 =
     316         131 :     u32_to_u8 (output_u32, u32_strlen (output_u32) + 1, NULL, &length);
     317         131 :   free (output_u32);
     318             : 
     319         131 :   if (!output_u8)
     320             :     {
     321           1 :       if (errno == ENOMEM)
     322           0 :         return IDN2_MALLOC;
     323           1 :       return IDN2_ENCODING_ERROR;
     324             :     }
     325             : 
     326         130 :   if (output)
     327         128 :     *output = (char *) output_u8;
     328             :   else
     329           2 :     free (output_u8);
     330             : 
     331         130 :   return IDN2_OK;
     332             : }
     333             : 
     334             : /**
     335             :  * idn2_to_unicode_8zlz:
     336             :  * @input: Input zero-terminated UTF-8 string.
     337             :  * @output: Newly allocated output string in current locale's character set.
     338             :  * @flags: Currently unused.
     339             :  *
     340             :  * Converts a possibly ACE encoded domain name in UTF-8 format into a
     341             :  * string encoded in the current locale's character set (punycode
     342             :  * decoding). The output buffer will be zero-terminated and must be
     343             :  * deallocated by the caller.
     344             :  *
     345             :  * @output may be NULL to test lookup of @input without allocating memory.
     346             :  *
     347             :  * Returns:
     348             :  *   %IDN2_OK: The conversion was successful.
     349             :  *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
     350             :  *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
     351             :  *   %IDN2_ENCODING_ERROR: Character conversion failed.
     352             :  *   %IDN2_MALLOC: Memory allocation failed.
     353             :  *
     354             :  * Since: 2.0.0
     355             :  **/
     356             : int
     357         132 : idn2_to_unicode_8zlz (const char *input, char **output, int flags)
     358             : {
     359             :   int rc;
     360             :   uint8_t *output_u8, *output_l8;
     361             :   const char *encoding;
     362             : 
     363         132 :   rc = idn2_to_unicode_8z8z (input, (char **) &output_u8, flags);
     364         132 :   if (rc != IDN2_OK || !input)
     365          33 :     return rc;
     366             : 
     367          99 :   encoding = locale_charset ();
     368             :   output_l8 =
     369          99 :     (uint8_t *) u8_strconv_to_encoding (output_u8, encoding, iconveh_error);
     370             : 
     371          99 :   if (!output_l8)
     372             :     {
     373          10 :       if (errno == ENOMEM)
     374           0 :         rc = IDN2_MALLOC;
     375             :       else
     376          10 :         rc = IDN2_ENCODING_ERROR;
     377             : 
     378          10 :       free (output_l8);
     379             :     }
     380             :   else
     381             :     {
     382          89 :       if (output)
     383          85 :         *output = (char *) output_l8;
     384             :       else
     385           4 :         free (output_l8);
     386             : 
     387          89 :       rc = IDN2_OK;
     388             :     }
     389             : 
     390          99 :   free (output_u8);
     391             : 
     392          99 :   return rc;
     393             : }
     394             : 
     395             : /**
     396             :  * idn2_to_unicode_lzlz:
     397             :  * @input: Input zero-terminated string encoded in the current locale's character set.
     398             :  * @output: Newly allocated output string in current locale's character set.
     399             :  * @flags: Currently unused.
     400             :  *
     401             :  * Converts a possibly ACE encoded domain name in the locale's character
     402             :  * set into a string encoded in the current locale's character set (punycode
     403             :  * decoding). The output buffer will be zero-terminated and must be
     404             :  * deallocated by the caller.
     405             :  *
     406             :  * @output may be NULL to test lookup of @input without allocating memory.
     407             :  *
     408             :  * Returns:
     409             :  *   %IDN2_OK: The conversion was successful.
     410             :  *   %IDN2_TOO_BIG_DOMAIN: The domain is too long.
     411             :  *   %IDN2_TOO_BIG_LABEL: A label is would have been too long.
     412             :  *   %IDN2_ENCODING_ERROR: Output character conversion failed.
     413             :  *   %IDN2_ICONV_FAIL: Input character conversion failed.
     414             :  *   %IDN2_MALLOC: Memory allocation failed.
     415             :  *
     416             :  * Since: 2.0.0
     417             :  **/
     418             : int
     419          98 : idn2_to_unicode_lzlz (const char *input, char **output, int flags)
     420             : {
     421             :   uint8_t *input_l8;
     422             :   const char *encoding;
     423             :   int rc;
     424             : 
     425          98 :   if (!input)
     426             :     {
     427           2 :       if (output)
     428           1 :         *output = NULL;
     429           2 :       return IDN2_OK;
     430             :     }
     431             : 
     432          96 :   encoding = locale_charset ();
     433          96 :   input_l8 = u8_strconv_from_encoding (input, encoding, iconveh_error);
     434             : 
     435          96 :   if (!input_l8)
     436             :     {
     437           3 :       if (errno == ENOMEM)
     438           0 :         return IDN2_MALLOC;
     439           3 :       return IDN2_ICONV_FAIL;
     440             :     }
     441             : 
     442          93 :   rc = idn2_to_unicode_8zlz ((char *) input_l8, output, flags);
     443          93 :   free (input_l8);
     444             : 
     445          93 :   return rc;
     446             : }

Generated by: LCOV version 1.16