GNU Libidn 1.33

const char *

Appendices

Indices

Simon Josefsson Datakonsult AB
Hagagatan 24
113 47 Stockholm
Sweden

E-mail: simon@josefsson.org

$ wget -q ftp://alpha.gnu.org/pub/gnu/libidn/libidn-1.33.tar.gz
$ tar xfz libidn-1.33.tar.gz
$ cd libidn-1.33/
$ ./configure
...
$ make
...
$ make install
...

#include <stringprep.h>

  if (!stringprep_check_version (STRINGPREP_VERSION))
    {
      printf ("stringprep_check_version() failed:\n"
              "Header file incompatible with shared library.\n");
      exit(EXIT_FAILURE);
    }

gcc -c foo.c `pkg-config libidn --cflags`

gcc -o foo foo.o `pkg-config libidn --libs`

gcc -o foo foo.c `pkg-config libidn --cflags --libs`

AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]],
                                [Support IDN (needs GNU Libidn)]),
  libidn=$withval, libidn=yes)
if test "$libidn" != "no"; then
  if test "$libidn" != "yes"; then
    LDFLAGS="${LDFLAGS} -L$libidn/lib"
    CPPFLAGS="${CPPFLAGS} -I$libidn/include"
  fi
  AC_CHECK_HEADER(idna.h,
    AC_CHECK_LIB(idn, stringprep_check_version,
      [libidn=yes LIBS="${LIBS} -lidn"], libidn=no),
    libidn=no)
fi
if test "$libidn" != "no" ; then
  AC_DEFINE(LIBIDN, 1, [Define to 1 if you want IDN support.])
else
  AC_MSG_WARN([Libidn not found])
fi
AC_MSG_CHECKING([if Libidn should be used])
AC_MSG_RESULT($libidn)

AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]],
                                [Support IDN (needs GNU Libidn)]),
  libidn=$withval, libidn=yes)
if test "$libidn" != "no" ; then
  PKG_CHECK_MODULES(LIBIDN, libidn >= 0.0.0, [libidn=yes], [libidn=no])
  if test "$libidn" != "yes" ; then
    libidn=no
    AC_MSG_WARN([Libidn not found])
  else
    libidn=yes
    AC_DEFINE(LIBIDN, 1, [Define to 1 if you want Libidn.])
  fi
fi
AC_MSG_CHECKING([if Libidn should be used])
AC_MSG_RESULT($libidn)

#include <idn-free.h>

#include <stringprep.h>

#include <stringprep.h>

#include <punycode.h>

#include <idna.h>

#include <tld.h>

#include <pr29.h>

/* example.c --- Example code showing how to use stringprep().
 * Copyright (C) 2002-2016 Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>		/* setlocale() */
#include <stringprep.h>

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example example.c `pkg-config --cflags --libs libidn`
 * $ ./example
 * Input string encoded as `ISO-8859-1': ª
 * Before locale2utf8 (length 2): aa 0a
 * Before stringprep (length 3): c2 aa 0a
 * After stringprep (length 2): 61 0a
 * $
 *
 */

int
main (void)
{
  char buf[BUFSIZ];
  char *p;
  int rc;
  size_t i;

  setlocale (LC_ALL, "");

  printf ("Input string encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  if (!fgets (buf, BUFSIZ, stdin))
    perror ("fgets");
  buf[strlen (buf) - 1] = '\0';

  printf ("Before locale2utf8 (length %ld): ", (long int) strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  p = stringprep_locale_to_utf8 (buf);
  if (p)
    {
      strcpy (buf, p);
      free (p);
    }
  else
    printf ("Could not convert string to UTF-8, continuing anyway...\n");

  printf ("Before stringprep (length %ld): ", (long int) strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  rc = stringprep (buf, BUFSIZ, 0, stringprep_nameprep);
  if (rc != STRINGPREP_OK)
    printf ("Stringprep failed (%d): %s\n", rc, stringprep_strerror (rc));
  else
    {
      printf ("After stringprep (length %ld): ", (long int) strlen (buf));
      for (i = 0; i < strlen (buf); i++)
	printf ("%02x ", buf[i] & 0xFF);
      printf ("\n");
    }

  return 0;
}

/* example2.c --- Example code showing how to use punycode.
 * Copyright (C) 2002-2016 Simon Josefsson
 * Copyright (C) 2002  Adam M. Costello
 *
 * This file is part of GNU Libidn.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <locale.h>		/* setlocale() */

/*
 * This file is derived from RFC 3492 written by Adam M. Costello.
 *
 * Disclaimer and license: Regarding this entire document or any
 * portion of it (including the pseudocode and C code), the author
 * makes no guarantees and is not responsible for any damage resulting
 * from its use.  The author grants irrevocable permission to anyone
 * to use, modify, and distribute it in any way that does not diminish
 * the rights of anyone else to use, modify, and distribute it,
 * provided that redistributed derivative works do not contain
 * misleading author or version information.  Derivative works need
 * not be licensed under similar terms.
 *
 */

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <punycode.h>

/* For testing, we'll just set some compile-time limits rather than */
/* use malloc(), and set a compile-time option rather than using a  */
/* command-line option.                                             */

enum
{
  unicode_max_length = 256,
  ace_max_length = 256
};

static void
usage (char **argv)
{
  fprintf (stderr,
	   "\n"
	   "%s -e reads code points and writes a Punycode string.\n"
	   "%s -d reads a Punycode string and writes code points.\n"
	   "\n"
	   "Input and output are plain text in the native character set.\n"
	   "Code points are in the form u+hex separated by whitespace.\n"
	   "Although the specification allows Punycode strings to contain\n"
	   "any characters from the ASCII repertoire, this test code\n"
	   "supports only the printable characters, and needs the Punycode\n"
	   "string to be followed by a newline.\n"
	   "The case of the u in u+hex is the force-to-uppercase flag.\n",
	   argv[0], argv[0]);
  exit (EXIT_FAILURE);
}

static void
fail (const char *msg)
{
  fputs (msg, stderr);
  exit (EXIT_FAILURE);
}

static const char too_big[] =
  "input or output is too large, recompile with larger limits\n";
static const char invalid_input[] = "invalid input\n";
static const char overflow[] = "arithmetic overflow\n";
static const char io_error[] = "I/O error\n";

/* The following string is used to convert printable */
/* characters between ASCII and the native charset:  */

static const char print_ascii[] = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" " !\"#$%&'()*+,-./" "0123456789:;<=>?" "\0x40"	/* at sign */
  "ABCDEFGHIJKLMNO"
  "PQRSTUVWXYZ[\\]^_" "`abcdefghijklmno" "pqrstuvwxyz{|}~\n";

int
main (int argc, char **argv)
{
  enum punycode_status status;
  int r;
  size_t input_length, output_length, j;
  unsigned char case_flags[unicode_max_length];

  setlocale (LC_ALL, "");

  if (argc != 2)
    usage (argv);
  if (argv[1][0] != '-')
    usage (argv);
  if (argv[1][2] != 0)
    usage (argv);

  if (argv[1][1] == 'e')
    {
      uint32_t input[unicode_max_length];
      unsigned long codept;
      char output[ace_max_length + 1], uplus[3];
      int c;

      /* Read the input code points: */

      input_length = 0;

      for (;;)
	{
	  r = scanf ("%2s%lx", uplus, &codept);
	  if (ferror (stdin))
	    fail (io_error);
	  if (r == EOF || r == 0)
	    break;

	  if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1)
	    {
	      fail (invalid_input);
	    }

	  if (input_length == unicode_max_length)
	    fail (too_big);

	  if (uplus[0] == 'u')
	    case_flags[input_length] = 0;
	  else if (uplus[0] == 'U')
	    case_flags[input_length] = 1;
	  else
	    fail (invalid_input);

	  input[input_length++] = codept;
	}

      /* Encode: */

      output_length = ace_max_length;
      status = punycode_encode (input_length, input, case_flags,
				&output_length, output);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Convert to native charset and output: */

      for (j = 0; j < output_length; ++j)
	{
	  c = output[j];
	  assert (c >= 0 && c <= 127);
	  if (print_ascii[c] == 0)
	    fail (invalid_input);
	  output[j] = print_ascii[c];
	}

      output[j] = 0;
      r = puts (output);
      if (r == EOF)
	fail (io_error);
      return EXIT_SUCCESS;
    }

  if (argv[1][1] == 'd')
    {
      char input[ace_max_length + 2], *p, *pp;
      uint32_t output[unicode_max_length];

      /* Read the Punycode input string and convert to ASCII: */

      if (!fgets (input, ace_max_length + 2, stdin))
	fail (io_error);
      if (ferror (stdin))
	fail (io_error);
      if (feof (stdin))
	fail (invalid_input);
      input_length = strlen (input) - 1;
      if (input[input_length] != '\n')
	fail (too_big);
      input[input_length] = 0;

      for (p = input; *p != 0; ++p)
	{
	  pp = strchr (print_ascii, *p);
	  if (pp == 0)
	    fail (invalid_input);
	  *p = pp - print_ascii;
	}

      /* Decode: */

      output_length = unicode_max_length;
      status = punycode_decode (input_length, input, &output_length,
				output, case_flags);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Output the result: */

      for (j = 0; j < output_length; ++j)
	{
	  r = printf ("%s+%04lX\n",
		      case_flags[j] ? "U" : "u", (unsigned long) output[j]);
	  if (r < 0)
	    fail (io_error);
	}

      return EXIT_SUCCESS;
    }

  usage (argv);
  return EXIT_SUCCESS;		/* not reached, but quiets compiler warning */
}

/* example3.c --- Example ToASCII() code showing how to use Libidn.
 * Copyright (C) 2002-2016 Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>		/* setlocale() */
#include <stringprep.h>		/* stringprep_locale_charset() */
#include <idna.h>		/* idna_to_ascii_lz() */

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example3 example3.c `pkg-config --cflags --libs libidn`
 * $ ./example3
 * Input domain encoded as `ISO-8859-1': www.räksmörgåsª.example
 * Read string (length 23): 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 aa 2e 65 78 61 6d 70 6c 65
 * ACE label (length 33): 'www.xn--rksmrgsa-0zap8p.example'
 * 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65
 * $
 *
 */

int
main (void)
{
  char buf[BUFSIZ];
  char *p;
  int rc;
  size_t i;

  setlocale (LC_ALL, "");

  printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  if (!fgets (buf, BUFSIZ, stdin))
    perror ("fgets");
  buf[strlen (buf) - 1] = '\0';

  printf ("Read string (length %ld): ", (long int) strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  rc = idna_to_ascii_lz (buf, &p, 0);
  if (rc != IDNA_SUCCESS)
    {
      printf ("ToASCII() failed (%d): %s\n", rc, idna_strerror (rc));
      return EXIT_FAILURE;
    }

  printf ("ACE label (length %ld): '%s'\n", (long int) strlen (p), p);
  for (i = 0; i < strlen (p); i++)
    printf ("%02x ", p[i] & 0xFF);
  printf ("\n");

  free (p);

  return 0;
}

/* example4.c --- Example ToUnicode() code showing how to use Libidn.
 * Copyright (C) 2002-2016 Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>		/* setlocale() */
#include <stringprep.h>		/* stringprep_locale_charset() */
#include <idna.h>		/* idna_to_unicode_lzlz() */

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example4 example4.c `pkg-config --cflags --libs libidn`
 * $ ./example4
 * Input domain encoded as `ISO-8859-1': www.xn--rksmrgsa-0zap8p.example
 * Read string (length 33): 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65
 * ACE label (length 23): 'www.räksmörgåsa.example'
 * 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 61 2e 65 78 61 6d 70 6c 65
 * $
 *
 */

int
main (void)
{
  char buf[BUFSIZ];
  char *p;
  int rc;
  size_t i;

  setlocale (LC_ALL, "");

  printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  if (!fgets (buf, BUFSIZ, stdin))
    perror ("fgets");
  buf[strlen (buf) - 1] = '\0';

  printf ("Read string (length %ld): ", (long int) strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  rc = idna_to_unicode_lzlz (buf, &p, 0);
  if (rc != IDNA_SUCCESS)
    {
      printf ("ToUnicode() failed (%d): %s\n", rc, idna_strerror (rc));
      return EXIT_FAILURE;
    }

  printf ("ACE label (length %ld): '%s'\n", (long int) strlen (p), p);
  for (i = 0; i < strlen (p); i++)
    printf ("%02x ", p[i] & 0xFF);
  printf ("\n");

  free (p);

  return 0;
}

/* example5.c --- Example TLD checking.
 * Copyright (C) 2004-2016 Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Get stringprep_locale_charset, etc. */
#include <stringprep.h>

/* Get idna_to_ascii_8z, etc. */
#include <idna.h>

/* Get tld_check_4z. */
#include <tld.h>

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example5 example5.c `pkg-config --cflags --libs libidn`
 * $ ./example5
 * Input domain encoded as `UTF-8': fooß.no
 * Read string (length 8): 66 6f 6f c3 9f 2e 6e 6f
 * ToASCII string (length 8): fooss.no
 * ToUnicode string: U+0066 U+006f U+006f U+0073 U+0073 U+002e U+006e U+006f
 * Domain accepted by TLD check
 *
 * $ ./example5
 * Input domain encoded as `UTF-8': gr€€n.no
 * Read string (length 12): 67 72 e2 82 ac e2 82 ac 6e 2e 6e 6f
 * ToASCII string (length 16): xn--grn-l50aa.no
 * ToUnicode string: U+0067 U+0072 U+20ac U+20ac U+006e U+002e U+006e U+006f
 * Domain rejected by TLD check, Unicode position 2
 *
 */

int
main (void)
{
  char buf[BUFSIZ];
  char *p;
  uint32_t *r;
  int rc;
  size_t errpos, i;

  printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  if (!fgets (buf, BUFSIZ, stdin))
    perror ("fgets");
  buf[strlen (buf) - 1] = '\0';

  printf ("Read string (length %ld): ", (long int) strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  p = stringprep_locale_to_utf8 (buf);
  if (p)
    {
      strcpy (buf, p);
      free (p);
    }
  else
    printf ("Could not convert string to UTF-8, continuing anyway...\n");

  rc = idna_to_ascii_8z (buf, &p, 0);
  if (rc != IDNA_SUCCESS)
    {
      printf ("idna_to_ascii_8z failed (%d): %s\n", rc, idna_strerror (rc));
      return 2;
    }

  printf ("ToASCII string (length %ld): %s\n", (long int) strlen (p), p);

  rc = idna_to_unicode_8z4z (p, &r, 0);
  free (p);
  if (rc != IDNA_SUCCESS)
    {
      printf ("idna_to_unicode_8z4z failed (%d): %s\n",
	      rc, idna_strerror (rc));
      return 2;
    }

  printf ("ToUnicode string: ");
  for (i = 0; r[i]; i++)
    printf ("U+%04x ", r[i]);
  printf ("\n");

  rc = tld_check_4z (r, &errpos, NULL);
  free (r);
  if (rc == TLD_INVALID)
    {
      printf ("Domain rejected by TLD check, Unicode position %ld\n", (long int) errpos);
      return 1;
    }
  else if (rc != TLD_SUCCESS)
    {
      printf ("tld_check_4z() failed (%d): %s\n", rc, tld_strerror (rc));
      return 2;
    }

  printf ("Domain accepted by TLD check\n");

  return 0;
}

  -h, --help               Print help and exit

  -V, --version            Print version and exit

  -s, --stringprep         Prepare string according to nameprep profile

  -d, --punycode-decode    Decode Punycode

  -e, --punycode-encode    Encode Punycode

  -a, --idna-to-ascii      Convert to ACE according to IDNA (default mode)

  -u, --idna-to-unicode    Convert from ACE according to IDNA

      --allow-unassigned   Toggle IDNA AllowUnassigned flag (default off)

      --usestd3asciirules  Toggle IDNA UseSTD3ASCIIRules flag (default off)

      --no-tld             Don't check string for TLD specific rules
                             Only for --idna-to-ascii and --idna-to-unicode

  -n, --nfkc               Normalize string according to Unicode v3.2 NFKC

  -p, --profile=STRING     Use specified stringprep profile instead
                             Valid stringprep profiles: `Nameprep',
                             `iSCSI', `Nodeprep', `Resourceprep',
                             `trace', `SASLprep'

      --debug              Print debugging information

      --quiet              Silent operation

$ CHARSET=ISO-8859-1 idn --punycode-encode
...

jas@latte:~$ idn
libidn 0.3.5
Copyright 2002, 2003 Simon Josefsson.
GNU Libidn comes with NO WARRANTY, to the extent permitted by law.
You may redistribute copies of GNU Libidn under the terms of
the GNU Lesser General Public License.  For more information
about these matters, see the file named COPYING.LIB.
Type each input string on a line by itself, terminated by a newline character.
räksmörgås.se
xn--rksmrgs-5wao1o.se
jas@latte:~$

jas@latte:~$ idn --quiet räksmörgås.se blåbærgrød.no
xn--rksmrgs-5wao1o.se
xn--blbrgrd-fxak7p.no
jas@latte:~$

jas@latte:~$ idn --quiet --profile=SASLprep --stringprep teßtª
teßta
jas@latte:~$

jas@latte:~$ idn --debug --quiet ""
system locale uses charset `UTF-8'.

jas@latte:~$

jas@latte:~$ idn --quiet foo
idn: could not convert from ISO-8859-1 to UTF-8.
jas@latte:~$

jas@latte:~$ idn --quiet --debug ""
system locale uses charset `ISO-8859-1'.

jas@latte:~$ CHARSET=UTF-8 idn --quiet --debug räksmörgås
system locale uses charset `UTF-8'.
input[0] = U+0072
input[1] = U+4af3
input[2] = U+006d
input[3] = U+1b29e5
input[4] = U+0073
output[0] = U+0078
output[1] = U+006e
output[2] = U+002d
output[3] = U+002d
output[4] = U+0072
output[5] = U+006d
output[6] = U+0073
output[7] = U+002d
output[8] = U+0068
output[9] = U+0069
output[10] = U+0036
output[11] = U+0064
output[12] = U+0035
output[13] = U+0039
output[14] = U+0037
output[15] = U+0035
output[16] = U+0035
output[17] = U+0032
output[18] = U+0061
xn--rms-hi6d597552a
jas@latte:~$

gnu.inet.encoding.IDNA.toASCII("blöds.züg");
gnu.inet.encoding.IDNA.toUnicode("xn--blds-6qa.xn--zg-xka");

$ java GenerateRFC3454
Creating RFC3454.java... Ok.

$ java GenerateNFKC
Creating CombiningClass.java... Ok.
Creating DecompositionKeys.java... Ok.
Creating DecompositionMappings.java... Ok.
Creating Composition.java... Ok.

$ java -cp .:/usr/share/java/libidn.jar TestIDNA -a <string to test>
Input: <string to test>
Output: <toASCII(string to test)>
$ java -cp .:/usr/share/java/libidn.jar TestIDNA -u <string to test>
Input: <string to test>
Output: <toUnicode(string to test)>

$ java -cp .:/usr/share/java/libidn/libidn.jar TestIDNA -t
No errors detected!

$ java -cp .:/usr/share/java/libidn.jar TestNFKC <string to test>
Input: <string to test>
Output: <nfkc version of the string to test>

$ java -cp .:/usr/share/java/libidn.jar TestNFKC
No errors detected!

From: Rick McGowan <rick@unicode.org>
Subject: Possible bug and status of PR 29 change(s)
To: bug-libidn@gnu.org
Date: Wed, 27 Oct 2004 14:49:17 -0700

Hello. On behalf of the Unicode Consortium editorial committee, I would
like to find out more information about the PR 29 fixes, if any, and
functions in Libidn. Your implementation was listed in the text of PR29 as
needing investigation, so I am following up on several implementations.

The UTC has accepted the proposed fix to D2 as outlined in PR29, and a new
draft of UAX #15 has been issued.

I have looked at Libidn 0.5.8 (today), and there may still be a possible
bug in NFKC.java and nfkc.c.

------------------------------------------------------

1. In NFKC.java, this line in canonicalOrdering():

      if (i > 0 && (last_cc == 0 || last_cc != cc)) {

should perhaps be changed to:

      if (i > 0 && (last_cc == 0 || last_cc < cc)) {

but I'm not sure of the sense of this comparison.

------------------------------------------------------

2. In nfkc.c, function _g_utf8_normalize_wc() has this code:

	  if (i > 0 &&
	      (last_cc == 0 || last_cc != cc) &&
	      combine (wc_buffer[last_start], wc_buffer[i],
		       &wc_buffer[last_start]))
	    {

This appears to have the same bug as the current Python implementation (in
Python 2.3.4). The code should be checking, as per new rule D2 UAX #15
update, that the next combining character is the same or HIGHER than the
current one. It now checks to see if it's non-zero and not equal.

The above line(s) should perhaps be changed to:

	  if (i > 0 &&
	      (last_cc == 0 || last_cc < cc) &&
	      combine (wc_buffer[last_start], wc_buffer[i],
		       &wc_buffer[last_start]))
	    {

but I'm not sure of the sense of the comparison (< or > or <=?) here.

In the text of PR29, I will be marking Libidn as "needs change" and adding
the version number that I checked. If any further change is made, please
let me know the release version, and I'll update again.

Regards,
	Rick McGowan

From: Simon Josefsson <jas@extundo.com>
Subject: Re: Possible bug and status of PR 29 change(s)
To: Rick McGowan <rick@unicode.org>
Cc: bug-libidn@gnu.org
Date: Thu, 28 Oct 2004 09:47:47 +0200

Rick McGowan <rick@unicode.org> writes:

> Hello. On behalf of the Unicode Consortium editorial committee, I would
> like to find out more information about the PR 29 fixes, if any, and
> functions in Libidn. Your implementation was listed in the text of PR29 as
> needing investigation, so I am following up on several implementations.
>
> The UTC has accepted the proposed fix to D2 as outlined in PR29, and a new
> draft of UAX #15 has been issued.
>
> I have looked at Libidn 0.5.8 (today), and there may still be a possible
> bug in NFKC.java and nfkc.c.

Hello Rick.

I believe the current behavior is intentional.  Libidn do not aim to
implement latest-and-greatest NFKC, it aim to implement the NFKC
functionality required for StringPrep and IDN.  As you may know,
StringPrep/IDN reference Unicode 3.2.0, and explicitly says any later
changes (which I consider PR29 as) do not apply.

In fact, I believe that would I incorporate the changes suggested in
PR29, I would in fact be violating the IDN specifications.

Thanks for looking into the code and finding the place where the
change could be made.  I'll see if I can mention this in the manual
somewhere, for technically interested readers.

Regards,
Simon

ToASCII (hi U+248C com) = hi5.com
ToASCII (räksmörgås U+2024 com) = xn--rksmrgs.com-l8as9u

ToASCII (hi U+248C com) = hi5.com
ToASCII (räksmörgås U+2024 com) = xn--rksmrgs-5wao1o.com

Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
http://fsf.org/

Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.

  Copyright (C)  year  your name.
  Permission is granted to copy, distribute and/or modify this document
  under the terms of the GNU Free Documentation License, Version 1.3
  or any later version published by the Free Software Foundation;
  with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
  Texts.  A copy of the license is included in the section entitled ``GNU
  Free Documentation License''.

    with the Invariant Sections being list their titles, with
    the Front-Cover Texts being list, and with the Back-Cover Texts
    being list.

Appendices
• Introduction:		How to use this manual.
• Preparation:		What you should do before using the library.
• Utility Functions:		Unicode transformation utility functions.
• Stringprep Functions:		Stringprep functions.
• Punycode Functions:		Punycode functions.
• IDNA Functions:		IDNA functions.
• TLD Functions:		TLD functions.
• PR29 Functions:		Detect strings non-idempotent under NFKC.
• Examples:		Demonstrate how to use the library.
• Invoking idn:		Command line interface to the library.
• Emacs API:		Emacs Lisp API for Libidn.
• Java API:		Notes on the Java port of Libidn.
• C# API:		Notes on the C# port of Libidn.
• Acknowledgements:		Whom to blame.
• History:		Rough outline of development history.
• PR29 discussion:		Implementation aspects of the PR29 flaw.
• On Label Separators:		Discussions of a flaw in the IDNA spec.
• Copying Information:		License texts.
Indices
• Function and Variable Index:
• Concept Index:

• Getting Started:
• Features:
• Library Overview:
• Supported Platforms:
• Getting help:
• Commercial Support:
• Downloading and Installing:
• Bug Reports:
• Contributing:

• Header:
• Initialization:
• Version Check:
• Building the source:
• Autoconf tests:
• Memory handling under Windows:

• Example 1:		Example using stringprep.
• Example 2:		Example using punycode.
• Example 3:		Example using IDNA ToASCII.
• Example 4:		Example using IDNA ToUnicode.
• Example 5:		Example using TLD checking.

	Index Entry	Section

I
	`idna-to-ascii`:	Emacs API
	`idna-to-unicode`:	Emacs API
	`idna_strerror`:	IDNA Functions
	`idna_to_ascii_4i`:	IDNA Functions
	`idna_to_ascii_4z`:	IDNA Functions
	`idna_to_ascii_8z`:	IDNA Functions
	`idna_to_ascii_lz`:	IDNA Functions
	`idna_to_unicode_44i`:	IDNA Functions
	`idna_to_unicode_4z4z`:	IDNA Functions
	`idna_to_unicode_8z4z`:	IDNA Functions
	`idna_to_unicode_8z8z`:	IDNA Functions
	`idna_to_unicode_8zlz`:	IDNA Functions
	`idna_to_unicode_lzlz`:	IDNA Functions
	`idn_free`:	Memory handling under Windows

P
	`pr29_4`:	PR29 Functions
	`pr29_4z`:	PR29 Functions
	`pr29_8z`:	PR29 Functions
	`pr29_strerror`:	PR29 Functions
	`punycode-decode`:	Emacs API
	`punycode-encode`:	Emacs API
	`punycode_decode`:	Punycode Functions
	`punycode_encode`:	Punycode Functions
	`punycode_strerror`:	Punycode Functions

S
	`stringprep`:	Stringprep Functions
	`stringprep_4i`:	Stringprep Functions
	`stringprep_4zi`:	Stringprep Functions
	`stringprep_check_version`:	Version Check
	`stringprep_convert`:	Utility Functions
	`stringprep_iscsi`:	Stringprep Functions
	`stringprep_locale_charset`:	Utility Functions
	`stringprep_locale_to_utf8`:	Utility Functions
	`stringprep_nameprep_no_unassigned`:	Stringprep Functions
	`stringprep_plain`:	Stringprep Functions
	`stringprep_profile`:	Stringprep Functions
	`stringprep_strerror`:	Stringprep Functions
	`stringprep_ucs4_nfkc_normalize`:	Utility Functions
	`stringprep_ucs4_to_utf8`:	Utility Functions
	`stringprep_unichar_to_utf8`:	Utility Functions
	`stringprep_utf8_nfkc_normalize`:	Utility Functions
	`stringprep_utf8_to_locale`:	Utility Functions
	`stringprep_utf8_to_ucs4`:	Utility Functions
	`stringprep_utf8_to_unichar`:	Utility Functions
	`stringprep_xmpp_nodeprep`:	Stringprep Functions
	`stringprep_xmpp_resourceprep`:	Stringprep Functions

T
	`tld_check_4`:	TLD Functions
	`tld_check_4t`:	TLD Functions
	`tld_check_4tz`:	TLD Functions
	`tld_check_4z`:	TLD Functions
	`tld_check_8z`:	TLD Functions
	`tld_check_lz`:	TLD Functions
	`tld_default_table`:	TLD Functions
	`tld_get_4`:	TLD Functions
	`tld_get_4z`:	TLD Functions
	`tld_get_table`:	TLD Functions
	`tld_get_z`:	TLD Functions
	`tld_strerror`:	TLD Functions

	Index Entry	Section

A
	AIX:	Supported Platforms
	ARM:	Supported Platforms
	Autoconf tests:	Autoconf tests

C
	command line:	Invoking idn
	Compiling your application:	Building the source
	Configure tests:	Autoconf tests
	Contributing:	Contributing

D
	de-allocation:	Memory handling under Windows
	Debian:	Supported Platforms
	Debian:	Supported Platforms
	Download:	Downloading and Installing

E
	Examples:	Examples

F
	FDL, GNU Free Documentation License:	GNU Free Documentation License
	free:	Memory handling under Windows
	FreeBSD:	Supported Platforms

H
	Hacking:	Contributing
	heap memory:	Memory handling under Windows
	HP-UX:	Supported Platforms

I
	IBM:	Supported Platforms
	`idn`:	Invoking idn
	IDNA Functions:	IDNA Functions
	Installation:	Downloading and Installing
	invoking `idn`:	Invoking idn
	IRIX:	Supported Platforms

M
	MacOS X:	Supported Platforms
	MacOS X:	Supported Platforms
	Mandrake:	Supported Platforms
	Memory handling:	Memory handling under Windows
	Microsoft:	Supported Platforms
	mingw32:	Supported Platforms
	Motorola Coldfire:	Supported Platforms

N
	NetBSD:	Supported Platforms

O
	OpenBSD:	Supported Platforms
	OpenPower 720:	Supported Platforms
	OS/2:	Supported Platforms

P
	PR29 Functions:	PR29 Functions
	Punycode Functions:	Punycode Functions

R
	RedHat:	Supported Platforms
	RedHat:	Supported Platforms
	RedHat:	Supported Platforms
	RedHat Advanced Server:	Supported Platforms
	Reporting Bugs:	Bug Reports

S
	Solaris:	Supported Platforms
	Solaris:	Supported Platforms
	Solaris:	Supported Platforms
	Stringprep Functions:	Stringprep Functions
	SuSE:	Supported Platforms
	SuSE Linux:	Supported Platforms
	SuSE Linux:	Supported Platforms
	SuSE Linux:	Supported Platforms

T
	TLD Functions:	TLD Functions
	Tru64:	Supported Platforms

U
	uClibc:	Supported Platforms
	uClinux:	Supported Platforms
	Utility Functions:	Utility Functions

W
	Windows:	Supported Platforms
	Windows:	Supported Platforms

GNU Libidn 1.33

Table of Contents

GNU Libidn

1 Introduction

1.1 Getting Started

1.2 Features

1.3 Library Overview

1.4 Supported Platforms

1.5 Getting help

1.6 Commercial Support

1.7 Downloading and Installing

1.7.1 Installing under Windows

1.8 Bug Reports

1.9 Contributing

2 Preparation

2.1 Header

2.2 Initialization

2.3 Version Check

stringprep_check_version

2.4 Building the source

2.5 Autoconf tests

2.6 Memory handling under Windows

2.7 Header file idn-free.h

2.8 Memory de-allocation function

idn_free

3 Utility Functions

3.1 Header file stringprep.h

3.2 Unicode Encoding Transformation

stringprep_unichar_to_utf8

stringprep_utf8_to_unichar

stringprep_ucs4_to_utf8

stringprep_utf8_to_ucs4

3.3 Unicode Normalization

stringprep_ucs4_nfkc_normalize

stringprep_utf8_nfkc_normalize

3.4 Character Set Conversion

stringprep_locale_charset

stringprep_convert

stringprep_locale_to_utf8

stringprep_utf8_to_locale

4 Stringprep Functions

4.1 Header file stringprep.h

4.2 Defining A Stringprep Profile

4.3 Control Flags

4.4 Core Functions

stringprep_4i

stringprep_4zi

stringprep

stringprep_profile

4.5 Error Handling

stringprep_strerror

4.6 Stringprep Profile Macros

5 Punycode Functions

5.1 Header file punycode.h

5.2 Unicode Code Point Data Type

5.3 Core Functions

punycode_encode

punycode_decode

5.4 Error Handling

punycode_strerror

6 IDNA Functions

6.1 Header file idna.h

6.2 Control Flags

6.3 Prefix String

6.4 Core Functions

idna_to_ascii_4i

idna_to_unicode_44i

6.5 Simplified ToASCII Interface

idna_to_ascii_4z

idna_to_ascii_8z

idna_to_ascii_lz

6.6 Simplified ToUnicode Interface

idna_to_unicode_4z4z

idna_to_unicode_8z4z

idna_to_unicode_8z8z

idna_to_unicode_8zlz

idna_to_unicode_lzlz

6.7 Error Handling

idna_strerror

7 TLD Functions

2.7 Header file `idn-free.h`

3.1 Header file `stringprep.h`

4.1 Header file `stringprep.h`

5.1 Header file `punycode.h`

6.1 Header file `idna.h`

7.1 Header file `tld.h`

8.1 Header file `pr29.h`