LCOV - code coverage report
Current view: top level - builds/libidn/libidn2/lib - idna.c (source / functions) Hit Total Coverage
Test: Libidn2-2.3.7.3-61ee Code Coverage Lines: 101 117 86.3 %
Date: 2024-12-01 07:08:19 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* idna.c - implementation of high-level IDNA processing function
       2             :    Copyright (C) 2011-2024 Simon Josefsson
       3             : 
       4             :    Libidn2 is free software: you can redistribute it and/or modify it
       5             :    under the terms of either:
       6             : 
       7             :      * the GNU Lesser General Public License as published by the Free
       8             :        Software Foundation; either version 3 of the License, or (at
       9             :        your option) any later version.
      10             : 
      11             :    or
      12             : 
      13             :      * the GNU General Public License as published by the Free
      14             :        Software Foundation; either version 2 of the License, or (at
      15             :        your option) any later version.
      16             : 
      17             :    or both in parallel, as here.
      18             : 
      19             :    This program is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      22             :    GNU General Public License for more details.
      23             : 
      24             :    You should have received copies of the GNU General Public License and
      25             :    the GNU Lesser General Public License along with this program.  If
      26             :    not, see <http://www.gnu.org/licenses/>.
      27             : */
      28             : 
      29             : #include <config.h>
      30             : 
      31             : #include <stdlib.h>               /* free */
      32             : #include <errno.h>                /* errno */
      33             : 
      34             : #include "idn2.h"
      35             : #include "bidi.h"
      36             : #include "tables.h"
      37             : #include "context.h"
      38             : #include "tr46map.h"
      39             : 
      40             : #include <unitypes.h>
      41             : #include <unictype.h>             /* uc_is_general_category, UC_CATEGORY_M */
      42             : #include <uninorm.h>              /* u32_normalize */
      43             : #include <unistr.h>               /* u8_to_u32 */
      44             : 
      45             : #include "idna.h"
      46             : 
      47             : /*
      48             :  * NFC Quick Check from
      49             :  * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms
      50             :  *
      51             :  * They say, this is much faster than 'brute force' normalization.
      52             :  * Strings are very likely already in NFC form.
      53             :  */
      54             : G_GNUC_IDN2_ATTRIBUTE_PURE static int
      55      145983 : _isNFC (uint32_t *label, size_t len)
      56             : {
      57      145983 :   int lastCanonicalClass = 0;
      58      145983 :   int result = 1;
      59             :   size_t it;
      60             : 
      61      299607 :   for (it = 0; it < len; it++)
      62             :     {
      63      153828 :       uint32_t ch = label[it];
      64             : 
      65             :       // supplementary code point
      66      153828 :       if (ch >= 0x10000)
      67       90614 :         it++;
      68             : 
      69      153828 :       int canonicalClass = uc_combining_class (ch);
      70      153828 :       if (lastCanonicalClass > canonicalClass && canonicalClass != 0)
      71           0 :         return 0;
      72             : 
      73      153828 :       NFCQCMap *map = get_nfcqc_map (ch);
      74      153828 :       if (map)
      75             :         {
      76         204 :           if (map->check)
      77         204 :             return 0;
      78           0 :           result = -1;
      79             :         }
      80             : 
      81      153624 :       lastCanonicalClass = canonicalClass;
      82             :     }
      83             : 
      84      145779 :   return result;
      85             : }
      86             : 
      87             : int
      88     1703089 : _idn2_u8_to_u32_nfc (const uint8_t *src, size_t srclen,
      89             :                      uint32_t **out, size_t *outlen, int nfc)
      90             : {
      91             :   uint32_t *p;
      92             :   size_t plen;
      93             : 
      94     1703089 :   p = u8_to_u32 (src, srclen, NULL, &plen);
      95     1703089 :   if (p == NULL)
      96             :     {
      97           1 :       if (errno == ENOMEM)
      98           0 :         return IDN2_MALLOC;
      99           1 :       return IDN2_ENCODING_ERROR;
     100             :     }
     101             : 
     102     1703088 :   if (nfc && !_isNFC (p, plen))
     103             :     {
     104             :       size_t tmplen;
     105         204 :       uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen);
     106         204 :       free (p);
     107         204 :       if (tmp == NULL)
     108             :         {
     109           0 :           if (errno == ENOMEM)
     110           0 :             return IDN2_MALLOC;
     111           0 :           return IDN2_NFC;
     112             :         }
     113             : 
     114         204 :       p = tmp;
     115         204 :       plen = tmplen;
     116             :     }
     117             : 
     118     1703088 :   *out = p;
     119     1703088 :   *outlen = plen;
     120     1703088 :   return IDN2_OK;
     121             : }
     122             : 
     123             : bool
     124     1715441 : _idn2_ascii_p (const uint8_t *src, size_t srclen)
     125             : {
     126             :   size_t i;
     127             : 
     128     1788920 :   for (i = 0; i < srclen; i++)
     129     1775168 :     if (src[i] >= 0x80)
     130     1701689 :       return false;
     131             : 
     132       13752 :   return true;
     133             : }
     134             : 
     135             : int
     136     2183756 : _idn2_label_test (int what, const uint32_t *label, size_t llen)
     137             : {
     138     2183756 :   if (what & TEST_NFC)
     139             :     {
     140             :       size_t plen;
     141     2183756 :       uint32_t *p = u32_normalize (UNINORM_NFC, label, llen,
     142             :                                    NULL, &plen);
     143             :       int ok;
     144     2183756 :       if (p == NULL)
     145             :         {
     146           0 :           if (errno == ENOMEM)
     147        1264 :             return IDN2_MALLOC;
     148           0 :           return IDN2_NFC;
     149             :         }
     150     2183756 :       ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0;
     151     2183756 :       free (p);
     152     2183756 :       if (!ok)
     153        1264 :         return IDN2_NOT_NFC;
     154             :     }
     155             : 
     156     2182492 :   if (what & TEST_2HYPHEN)
     157             :     {
     158     2182492 :       if (llen >= 4 && label[2] == '-' && label[3] == '-')
     159         583 :         return IDN2_2HYPHEN;
     160             :     }
     161             : 
     162     2181909 :   if (what & TEST_HYPHEN_STARTEND)
     163             :     {
     164      629636 :       if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-'))
     165        2729 :         return IDN2_HYPHEN_STARTEND;
     166             :     }
     167             : 
     168     2179180 :   if (what & TEST_LEADING_COMBINING)
     169             :     {
     170     2179180 :       if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M))
     171       13461 :         return IDN2_LEADING_COMBINING;
     172             :     }
     173             : 
     174     2165719 :   if (what & TEST_DISALLOWED)
     175             :     {
     176             :       size_t i;
     177     2964718 :       for (i = 0; i < llen; i++)
     178     1593941 :         if (_idn2_disallowed_p (label[i]))
     179             :           {
     180      184063 :             if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) &&
     181       32259 :                 (what & TEST_ALLOW_STD3_DISALLOWED))
     182             :               {
     183             :                 IDNAMap map;
     184       32243 :                 get_idna_map (label[i], &map);
     185       32243 :                 if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
     186       27613 :                     map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))
     187        4643 :                   continue;
     188             : 
     189             :               }
     190             : 
     191      179420 :             return IDN2_DISALLOWED;
     192             :           }
     193             :     }
     194             : 
     195     1986299 :   if (what & TEST_CONTEXTJ)
     196             :     {
     197             :       size_t i;
     198           0 :       for (i = 0; i < llen; i++)
     199           0 :         if (_idn2_contextj_p (label[i]))
     200           0 :           return IDN2_CONTEXTJ;
     201             :     }
     202             : 
     203     1986299 :   if (what & TEST_CONTEXTJ_RULE)
     204             :     {
     205             :       size_t i;
     206             :       int rc;
     207             : 
     208     2780065 :       for (i = 0; i < llen; i++)
     209             :         {
     210     1409907 :           rc = _idn2_contextj_rule (label, llen, i);
     211     1409907 :           if (rc != IDN2_OK)
     212         619 :             return rc;
     213             :         }
     214             :     }
     215             : 
     216     1985680 :   if (what & TEST_CONTEXTO)
     217             :     {
     218             :       size_t i;
     219           0 :       for (i = 0; i < llen; i++)
     220           0 :         if (_idn2_contexto_p (label[i]))
     221           0 :           return IDN2_CONTEXTO;
     222             :     }
     223             : 
     224     1985680 :   if (what & TEST_CONTEXTO_WITH_RULE)
     225             :     {
     226             :       size_t i;
     227     2772353 :       for (i = 0; i < llen; i++)
     228     1402529 :         if (_idn2_contexto_p (label[i])
     229        6468 :             && !_idn2_contexto_with_rule (label[i]))
     230           0 :           return IDN2_CONTEXTO_NO_RULE;
     231             :     }
     232             : 
     233     1985680 :   if (what & TEST_CONTEXTO_RULE)
     234             :     {
     235             :       size_t i;
     236             :       int rc;
     237             : 
     238        5201 :       for (i = 0; i < llen; i++)
     239             :         {
     240        4918 :           rc = _idn2_contexto_rule (label, llen, i);
     241        4918 :           if (rc != IDN2_OK)
     242          51 :             return rc;
     243             :         }
     244             :     }
     245             : 
     246     1985629 :   if (what & TEST_UNASSIGNED)
     247             :     {
     248             :       size_t i;
     249     1951396 :       for (i = 0; i < llen; i++)
     250     1406596 :         if (_idn2_unassigned_p (label[i]))
     251      825307 :           return IDN2_UNASSIGNED;
     252             :     }
     253             : 
     254     1160322 :   if (what & TEST_BIDI)
     255             :     {
     256      544800 :       int rc = _idn2_bidi (label, llen);
     257      544800 :       if (rc != IDN2_OK)
     258        1059 :         return rc;
     259             :     }
     260             : 
     261     1159263 :   if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL))
     262             :     {
     263             :       size_t i;
     264     1027568 :       int transitional = what & TEST_TRANSITIONAL;
     265             : 
     266             :       /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */
     267     2180849 :       for (i = 0; i < llen; i++)
     268     1153281 :         if (label[i] == 0x002E)
     269           0 :           return IDN2_DOT_IN_LABEL;
     270             : 
     271             :       /* TR46: 6. Each code point in the label must only have certain status
     272             :        * values according to Section 5, IDNA Mapping Table:
     273             :        *    a. For Transitional Processing, each value must be valid.
     274             :        *    b. For Nontransitional Processing, each value must be either valid or deviation. */
     275     2172362 :       for (i = 0; i < llen; i++)
     276             :         {
     277             :           IDNAMap map;
     278             : 
     279     1148290 :           get_idna_map (label[i], &map);
     280             : 
     281     1148290 :           if (map_is (&map, TR46_FLG_VALID) ||
     282       24917 :               (!transitional && map_is (&map, TR46_FLG_DEVIATION)))
     283     1144794 :             continue;
     284             : 
     285       23407 :           if (what & TEST_ALLOW_STD3_DISALLOWED &&
     286       23233 :               (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) ||
     287        3458 :                map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)))
     288       19911 :             continue;
     289             : 
     290        3496 :           return transitional ? IDN2_INVALID_TRANSITIONAL :
     291             :             IDN2_INVALID_NONTRANSITIONAL;
     292             :         }
     293             :     }
     294             : 
     295     1155767 :   return IDN2_OK;
     296             : }

Generated by: LCOV version 1.16