LCOV - code coverage report
Current view: top level - builds/libidn/libidn2/lib - idna.c (source / functions) Hit Total Coverage
Test: Libidn2-2.3.8.3-7d33 Code Coverage Lines: 99 117 84.6 %
Date: 2025-03-14 23:15:33 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* idna.c - implementation of high-level IDNA processing function
       2             :    Copyright (C) 2011-2025 Simon Josefsson
       3             : 
       4             :    Libidn2 is free software: you can redistribute it and/or modify it
       5             :    under the terms of either:
       6             : 
       7             :      * the GNU Lesser General Public License as published by the Free
       8             :        Software Foundation; either version 3 of the License, or (at
       9             :        your option) any later version.
      10             : 
      11             :    or
      12             : 
      13             :      * the GNU General Public License as published by the Free
      14             :        Software Foundation; either version 2 of the License, or (at
      15             :        your option) any later version.
      16             : 
      17             :    or both in parallel, as here.
      18             : 
      19             :    This program is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      22             :    GNU General Public License for more details.
      23             : 
      24             :    You should have received copies of the GNU General Public License and
      25             :    the GNU Lesser General Public License along with this program.  If
      26             :    not, see <http://www.gnu.org/licenses/>.
      27             : */
      28             : 
      29             : #include <config.h>
      30             : 
      31             : #include <stdlib.h>               /* free */
      32             : #include <errno.h>                /* errno */
      33             : 
      34             : #include "idn2.h"
      35             : #include "bidi.h"
      36             : #include "tables.h"
      37             : #include "context.h"
      38             : #include "tr46map.h"
      39             : 
      40             : #include <unitypes.h>
      41             : #include <unictype.h>             /* uc_is_general_category, UC_CATEGORY_M */
      42             : #include <uninorm.h>              /* u32_normalize */
      43             : #include <unistr.h>               /* u8_to_u32 */
      44             : 
      45             : #include "idna.h"
      46             : 
      47             : /*
      48             :  * NFC Quick Check from
      49             :  * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
      50             :  *
      51             :  * They say, this is much faster than 'brute force' normalization.
      52             :  * Strings are very likely already in NFC form.
      53             :  */
      54             : G_GNUC_IDN2_ATTRIBUTE_PURE static int
      55         958 : _isNFC (uint32_t *label, size_t len)
      56             : {
      57         958 :   int lastCanonicalClass = 0;
      58         958 :   int result = 1;
      59             :   size_t it;
      60             : 
      61        7876 :   for (it = 0; it < len; it++)
      62             :     {
      63        6974 :       uint32_t ch = label[it];
      64             : 
      65             :       // supplementary code point
      66        6974 :       if (ch >= 0x10000)
      67          53 :         it++;
      68             : 
      69        6974 :       int canonicalClass = uc_combining_class (ch);
      70        6974 :       if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
      71           0 :         return 0;
      72             : 
      73        6974 :       NFCQCMap *map = get_nfcqc_map (ch);
      74        6974 :       if (map)
      75             :         {
      76          56 :           if (map->check)
      77          56 :             return 0;
      78           0 :           result = -1;
      79             :         }
      80             : 
      81        6918 :       lastCanonicalClass = canonicalClass;
      82             :     }
      83             : 
      84         902 :   return result;
      85             : }
      86             : 
      87             : int
      88        6173 : _idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
      89             :                      uint32_t **out, size_t *outlen, int nfc)
      90             : {
      91             :   uint32_t *p;
      92             :   size_t plen;
      93             : 
      94        6173 :   p = u8_to_u32 (src, srclen, NULL, &plen);
      95        6173 :   if (p == NULL)
      96             :     {
      97           0 :       if (errno == ENOMEM)
      98           0 :         return IDN2_MALLOC;
      99           0 :       return IDN2_ENCODING_ERROR;
     100             :     }
     101             : 
     102        6173 :   if (nfc && !_isNFC (p, plen))
     103             :     {
     104             :       size_t tmplen;
     105          56 :       uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
     106          56 :       free (p);
     107          56 :       if (tmp == NULL)
     108             :         {
     109           0 :           if (errno == ENOMEM)
     110           0 :             return IDN2_MALLOC;
     111           0 :           return IDN2_NFC;
     112             :         }
     113             : 
     114          56 :       p = tmp;
     115          56 :       plen = tmplen;
     116             :     }
     117             : 
     118        6173 :   *out = p;
     119        6173 :   *outlen = plen;
     120        6173 :   return IDN2_OK;
     121             : }
     122             : 
     123             : bool
     124        9997 : _idn2_ascii_p (const uint8_t *src, size_t srclen)
     125             : {
     126             :   size_t i;
     127             : 
     128       44580 :   for (i = 0; i < srclen; i++)
     129       40641 :     if (src[i] >= 0x80)
     130        6058 :       return false;
     131             : 
     132        3939 :   return true;
     133             : }
     134             : 
     135             : int
     136       23229 : _idn2_label_test (int what, const uint32_t *label, size_t llen)
     137             : {
     138       23229 :   if (what & TEST_NFC)
     139             :     {
     140             :       size_t plen;
     141       23229 :       uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
     142             :                                    NULL, &plen);
     143             :       int ok;
     144       23229 :       if (p == NULL)
     145             :         {
     146           0 :           if (errno == ENOMEM)
     147         118 :             return IDN2_MALLOC;
     148           0 :           return IDN2_NFC;
     149             :         }
     150       23229 :       ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
     151       23229 :       free (p);
     152       23229 :       if (!ok)
     153         118 :         return IDN2_NOT_NFC;
     154             :     }
     155             : 
     156       23111 :   if (what & TEST_2HYPHEN)
     157             :     {
     158       23111 :       if (llen >= 4 && label[2] == '-' && label[3] == '-')
     159         501 :         return IDN2_2HYPHEN;
     160             :     }
     161             : 
     162       22610 :   if (what & TEST_HYPHEN_STARTEND)
     163             :     {
     164       18800 :       if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
     165        1451 :         return IDN2_HYPHEN_STARTEND;
     166             :     }
     167             : 
     168       21159 :   if (what & TEST_LEADING_COMBINING)
     169             :     {
     170       21159 :       if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
     171         284 :         return IDN2_LEADING_COMBINING;
     172             :     }
     173             : 
     174       20875 :   if (what & TEST_DISALLOWED)
     175             :     {
     176             :       size_t i;
     177       36711 :       for (i = 0; i < llen; i++)
     178       32664 :         if (_idn2_disallowed_p (label[i]))
     179             :           {
     180        3956 :             if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
     181        3934 :                 (what & TEST_ALLOW_STD3_DISALLOWED))
     182             :               {
     183             :                 IDNAMap map;
     184        3918 :                 get_idna_map (label[i], &map);
     185        3918 :                 if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
     186          59 :                     map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
     187        3871 :                   continue;
     188             : 
     189             :               }
     190             : 
     191          85 :             return IDN2_DISALLOWED;
     192             :           }
     193             :     }
     194             : 
     195       20790 :   if (what & TEST_CONTEXTJ)
     196             :     {
     197             :       size_t i;
     198           0 :       for (i = 0; i < llen; i++)
     199           0 :         if (_idn2_contextj_p (label[i]))
     200           0 :           return IDN2_CONTEXTJ;
     201             :     }
     202             : 
     203       20790 :   if (what & TEST_CONTEXTJ_RULE)
     204             :     {
     205             :       size_t i;
     206             :       int rc;
     207             : 
     208       33487 :       for (i = 0; i < llen; i++)
     209             :         {
     210       29750 :           rc = _idn2_contextj_rule (label, llen, i);
     211       29750 :           if (rc != IDN2_OK)
     212         310 :             return rc;
     213             :         }
     214             :     }
     215             : 
     216       20480 :   if (what & TEST_CONTEXTO)
     217             :     {
     218             :       size_t i;
     219           0 :       for (i = 0; i < llen; i++)
     220           0 :         if (_idn2_contexto_p (label[i]))
     221           0 :           return IDN2_CONTEXTO;
     222             :     }
     223             : 
     224       20480 :   if (what & TEST_CONTEXTO_WITH_RULE)
     225             :     {
     226             :       size_t i;
     227       26588 :       for (i = 0; i < llen; i++)
     228       23140 :         if (_idn2_contexto_p (label[i])
     229        6269 :             && !_idn2_contexto_with_rule (label[i]))
     230           0 :           return IDN2_CONTEXTO_NO_RULE;
     231             :     }
     232             : 
     233       20480 :   if (what & TEST_CONTEXTO_RULE)
     234             :     {
     235             :       size_t i;
     236             :       int rc;
     237             : 
     238        4955 :       for (i = 0; i < llen; i++)
     239             :         {
     240        4695 :           rc = _idn2_contexto_rule (label, llen, i);
     241        4695 :           if (rc != IDN2_OK)
     242          29 :             return rc;
     243             :         }
     244             :     }
     245             : 
     246       20451 :   if (what & TEST_UNASSIGNED)
     247             :     {
     248             :       size_t i;
     249       30718 :       for (i = 0; i < llen; i++)
     250       27032 :         if (_idn2_unassigned_p (label[i]))
     251          22 :           return IDN2_UNASSIGNED;
     252             :     }
     253             : 
     254       20429 :   if (what & TEST_BIDI)
     255             :     {
     256        3686 :       int rc = _idn2_bidi (label, llen);
     257        3686 :       if (rc != IDN2_OK)
     258         659 :         return rc;
     259             :     }
     260             : 
     261       19770 :   if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
     262             :     {
     263             :       size_t i;
     264       19664 :       int transitional = what & TEST_TRANSITIONAL;
     265             : 
     266             :       /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
     267      105775 :       for (i = 0; i < llen; i++)
     268       86111 :         if (label[i] == 0x002E)
     269           0 :           return IDN2_DOT_IN_LABEL;
     270             : 
     271             :       /* TR46: 6. Each code point in the label must only have certain status
     272             :        * values according to Section 5, IDNA Mapping Table:
     273             :        *    a. For Transitional Processing, each value must be valid.
     274             :        *    b. For Nontransitional Processing, each value must be either valid or deviation. */
     275      103856 :       for (i = 0; i < llen; i++)
     276             :         {
     277             :           IDNAMap map;
     278             : 
     279       84628 :           get_idna_map (label[i], &map);
     280             : 
     281       84628 :           if (map_is (&map, TR46_FLG_VALID) ||
     282       16490 :               (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
     283       84192 :             continue;
     284             : 
     285       16523 :           if (what & TEST_ALLOW_STD3_DISALLOWED &&
     286       16349 :               (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
     287         356 :                map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
     288       16087 :             continue;
     289             : 
     290         436 :           return transitional ? IDN2_INVALID_TRANSITIONAL :
     291             :             IDN2_INVALID_NONTRANSITIONAL;
     292             :         }
     293             :     }
     294             : 
     295       19334 :   return IDN2_OK;
     296             : }

Generated by: LCOV version 1.16