/* Charset conversion.
   Copyright (C) 2001-2006 Free Software Foundation, Inc.
   Written by Bruno Haible and Simon Josefsson.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#include <config.h>

/* Specification.  */
#include "striconv.h"

#include <errno.h>
#include <stdlib.h>
#include <string.h>

#if HAVE_ICONV
# include <iconv.h>
/* Get MB_LEN_MAX, CHAR_BIT.  */
# include <limits.h>
#endif

#include "strdup.h"
#include "c-strcase.h"

#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif


#if HAVE_ICONV

int
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
	      char **resultp, size_t *lengthp)
{
# define tmpbufsize 4096
  size_t length;
  char *result;

  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
  /* Set to the initial state.  */
  iconv (cd, NULL, NULL, NULL, NULL);
# endif

  /* Determine the length we need.  */
  {
    size_t count = 0;
    char tmpbuf[tmpbufsize];
    const char *inptr = src;
    size_t insize = srclen;

    while (insize > 0)
      {
	char *outptr = tmpbuf;
	size_t outsize = tmpbufsize;
	size_t res = iconv (cd,
			    (ICONV_CONST char **) &inptr, &insize,
			    &outptr, &outsize);

	if (res == (size_t)(-1))
	  {
	    if (errno == E2BIG)
	      ;
	    else if (errno == EINVAL)
	      break;
	    else
	      return -1;
	  }
# if !defined _LIBICONV_VERSION && !defined __GLIBC__
	/* Irix iconv() inserts a NUL byte if it cannot convert.
	   NetBSD iconv() inserts a question mark if it cannot convert.
	   Only GNU libiconv and GNU libc are known to prefer to fail rather
	   than doing a lossy conversion.  */
	else if (res > 0)
	  {
	    errno = EILSEQ;
	    return -1;
	  }
# endif
	count += outptr - tmpbuf;
      }
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    {
      char *outptr = tmpbuf;
      size_t outsize = tmpbufsize;
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);

      if (res == (size_t)(-1))
	return -1;
      count += outptr - tmpbuf;
    }
# endif
    length = count;
  }

  if (length == 0)
    {
      *lengthp = 0;
      return 0;
    }
  result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
  if (result == NULL)
    {
      errno = ENOMEM;
      return -1;
    }
  *resultp = result;
  *lengthp = length;

  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
  /* Return to the initial state.  */
  iconv (cd, NULL, NULL, NULL, NULL);
# endif

  /* Do the conversion for real.  */
  {
    const char *inptr = src;
    size_t insize = srclen;
    char *outptr = result;
    size_t outsize = length;

    while (insize > 0)
      {
	size_t res = iconv (cd,
			    (ICONV_CONST char **) &inptr, &insize,
			    &outptr, &outsize);

	if (res == (size_t)(-1))
	  {
	    if (errno == EINVAL)
	      break;
	    else
	      return -1;
	  }
# if !defined _LIBICONV_VERSION && !defined __GLIBC__
	/* Irix iconv() inserts a NUL byte if it cannot convert.
	   NetBSD iconv() inserts a question mark if it cannot convert.
	   Only GNU libiconv and GNU libc are known to prefer to fail rather
	   than doing a lossy conversion.  */
	else if (res > 0)
	  {
	    errno = EILSEQ;
	    return -1;
	  }
# endif
      }
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    {
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);

      if (res == (size_t)(-1))
	return -1;
    }
# endif
    if (outsize != 0)
      abort ();
  }

  return 0;
# undef tmpbufsize
}

char *
str_cd_iconv (const char *src, iconv_t cd)
{
  /* For most encodings, a trailing NUL byte in the input will be converted
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
     function is usable for UTF-7, we have to exclude the NUL byte from the
     conversion and add it by hand afterwards.  */
# if PROBABLY_SLOWER

  char *result = NULL;
  size_t length;
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
  char *final_result;

  if (retval < 0)
    {
      if (result != NULL)
	{
	  int saved_errno = errno;
	  free (result);
	  errno = saved_errno;
	}
      return NULL;
    }

  /* Add the terminating NUL byte.  */
  final_result =
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
  if (final_result == NULL)
    {
      if (result != NULL)
	free (result);
      errno = ENOMEM;
      return NULL;
    }
  final_result[length] = '\0';

  return final_result;

# else

  char *result;
  size_t result_size;
  size_t length;
  const char *inptr = src;
  size_t inbytes_remaining = strlen (src);

  /* Make a guess for the worst-case output size, in order to avoid a
     realloc.  It's OK if the guess is wrong as long as it is not zero and
     doesn't lead to an integer overflow.  */
  result_size = inbytes_remaining;
  {
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
      result_size *= MB_LEN_MAX;
  }
  result_size += 1; /* for the terminating NUL */

  result = (char *) malloc (result_size);
  if (result == NULL)
    {
      errno = ENOMEM;
      return NULL;
    }

  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
  /* Set to the initial state.  */
  iconv (cd, NULL, NULL, NULL, NULL);
# endif

  /* Do the conversion.  */
  {
    char *outptr = result;
    size_t outbytes_remaining = result_size - 1;

    for (;;)
      {
	/* Here inptr + inbytes_remaining = src + strlen (src),
		outptr + outbytes_remaining = result + result_size - 1.  */
	size_t res = iconv (cd,
			    (ICONV_CONST char **) &inptr, &inbytes_remaining,
			    &outptr, &outbytes_remaining);

	if (res == (size_t)(-1))
	  {
	    if (errno == EINVAL)
	      break;
	    else if (errno == E2BIG)
	      {
		size_t used = outptr - result;
		size_t newsize = result_size * 2;
		char *newresult;

		if (!(newsize > result_size))
		  {
		    errno = ENOMEM;
		    goto failed;
		  }
		newresult = (char *) realloc (result, newsize);
		if (newresult == NULL)
		  {
		    errno = ENOMEM;
		    goto failed;
		  }
		result = newresult;
		result_size = newsize;
		outptr = result + used;
		outbytes_remaining = result_size - 1 - used;
	      }
	    else
	      goto failed;
	  }
# if !defined _LIBICONV_VERSION && !defined __GLIBC__
	/* Irix iconv() inserts a NUL byte if it cannot convert.
	   NetBSD iconv() inserts a question mark if it cannot convert.
	   Only GNU libiconv and GNU libc are known to prefer to fail rather
	   than doing a lossy conversion.  */
	else if (res > 0)
	  {
	    errno = EILSEQ;
	    goto failed;
	  }
# endif
	else
	  break;
      }
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    for (;;)
      {
	/* Here outptr + outbytes_remaining = result + result_size - 1.  */
	size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);

	if (res == (size_t)(-1))
	  {
	    if (errno == E2BIG)
	      {
		size_t used = outptr - result;
		size_t newsize = result_size * 2;
		char *newresult;

		if (!(newsize > result_size))
		  {
		    errno = ENOMEM;
		    goto failed;
		  }
		newresult = (char *) realloc (result, newsize);
		if (newresult == NULL)
		  {
		    errno = ENOMEM;
		    goto failed;
		  }
		result = newresult;
		result_size = newsize;
		outptr = result + used;
		outbytes_remaining = result_size - 1 - used;
	      }
	    else
	      goto failed;
	  }
	else
	  break;
      }
# endif

    /* Add the terminating NUL byte.  */
    *outptr++ = '\0';

    length = outptr - result;
  }

  /* Give away unused memory.  */
  if (length < result_size)
    {
      char *smaller_result = (char *) realloc (result, length);

      if (smaller_result != NULL)
	result = smaller_result;
    }

  return result;

 failed:
  {
    int saved_errno = errno;
    free (result);
    errno = saved_errno;
    return NULL;
  }

# endif
}

#endif

char *
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
{
  if (c_strcasecmp (from_codeset, to_codeset) == 0)
    return strdup (src);
  else
    {
#if HAVE_ICONV
      iconv_t cd;
      char *result;

      /* Avoid glibc-2.1 bug with EUC-KR.  */
# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
	  || c_strcasecmp (to_codeset, "EUC-KR") == 0)
	{
	  errno = EINVAL;
	  return NULL;
	}
# endif
      cd = iconv_open (to_codeset, from_codeset);
      if (cd == (iconv_t) -1)
	return NULL;

      result = str_cd_iconv (src, cd);

      if (result == NULL)
	{
	  /* Close cd, but preserve the errno from str_cd_iconv.  */
	  int saved_errno = errno;
	  iconv_close (cd);
	  errno = saved_errno;
	}
      else
	{
	  if (iconv_close (cd) < 0)
	    {
	      /* Return NULL, but free the allocated memory, and while doing
		 that, preserve the errno from iconv_close.  */
	      int saved_errno = errno;
	      free (result);
	      errno = saved_errno;
	      return NULL;
	    }
	}
      return result;
#else
      /* This is a different error code than if iconv_open existed but didn't
	 support from_codeset and to_codeset, so that the caller can emit
	 an error message such as
	   "iconv() is not supported. Installing GNU libiconv and
	    then reinstalling this package would fix this."  */
      errno = ENOSYS;
      return NULL;
#endif
    }
}