libsent/src/util/charconv_iconv.c

Go to the documentation of this file.
00001 
00019 /*
00020  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00021  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00022  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00023  * All rights reserved
00024  */
00025 
00026 #include <sent/stddefs.h>
00027 
00028 #ifdef CHARACTER_CONVERSION
00029 #ifdef HAVE_ICONV
00030 
00031 #include <iconv.h>
00032 static iconv_t cd = (iconv_t)-1; 
00033 
00043 boolean
00044 charconv_iconv_setup(char *fromcode, char *tocode, boolean *enable_conv)
00045 {
00046   /* clear already allocated descriptor */
00047   if (cd != (iconv_t)-1) {
00048     if (iconv_close(cd) < 0) {
00049       perror("j_prinf_set_iconv");
00050       return FALSE;
00051     }
00052     cd = (iconv_t)-1;
00053   }
00054   
00055   if (tocode == NULL) {
00056     /* disable conversion */
00057     *enable_conv = FALSE;
00058   } else {
00059     /* check for codes */
00060     if (fromcode == NULL) {
00061       j_printerr("Error: charset names of both input and output should be given.\n");
00062       j_printerr("Error: use \"-charconv from to\" instead of \"-kanji\".\n");
00063       *enable_conv = FALSE;
00064       return FALSE;
00065     }      
00066     /* allocate conversion descriptor */
00067     cd = iconv_open(tocode, fromcode);
00068     if (cd == (iconv_t)-1) {
00069       /* allocation failed */
00070       j_printerr("Error: unknown charset name in \"%s\" or \"%s\"\n", fromcode, tocode);
00071       j_printerr("Error: do \"iconv --list\" to get the list of available charset names.\n");
00072       *enable_conv = FALSE;
00073       return FALSE;
00074     }
00075     *enable_conv = TRUE;
00076   }
00077   return TRUE;
00078 }
00079 
00090 char *
00091 charconv_iconv(char *instr, char *outstr, int maxoutlen)
00092 {
00093   char *src, *dst;
00094   size_t srclen, dstlen;
00095   size_t ret;
00096 
00097   if (cd == (iconv_t)-1) {
00098     j_error("InternalError: codeconv: conversion descriptor not allocated\n");
00099   }
00100   srclen = strlen(instr)+1;
00101   dstlen = maxoutlen;
00102   src = instr;
00103   dst = outstr;
00104   ret = iconv(cd, &src, &srclen, &dst, &dstlen);
00105   if (ret == -1) {
00106     switch(errno) {
00107     case EILSEQ:
00108       j_error("InternalError: codeconv: invalid multibyte sequence in the input\n");
00109       break;
00110     case EINVAL:
00111       j_error("InternalError: codeconv: incomplete multibyte sequence in the input\n");
00112       break;
00113     case E2BIG:
00114       j_error("InternalError: codeconv: converted string size exceeded buffer (>%d)\n", maxoutlen);
00115       break;
00116     }
00117   }
00118 
00119   /* outstr always holds the result */
00120   return(outstr);
00121 }
00122 
00123 #endif /* HAVE_ICONV */
00124 #endif /* CHARACTER_CONVERSION */

Generated on Tue Dec 26 16:16:33 2006 for Julius by  doxygen 1.5.0