00001
00034
00035
00036
00037
00038
00039
00040
00041 #include <sent/stddefs.h>
00042
00043 #ifdef CHARACTER_CONVERSION
00044
00045 #ifdef USE_WIN32_MULTIBYTE
00046
00047 #include <windows.h>
00048 #include <winnls.h>
00049 #include <jlib.h>
00050
00051 static boolean euctosjis = FALSE;
00052 static boolean only_euc_conv = FALSE;
00053
00054 static unsigned int from_cp;
00055 static unsigned int to_cp;
00056
00066 boolean
00067 charconv_win32_setup(char *fromcode, char *tocode, boolean *enable_conv)
00068 {
00069 unsigned int src_p, dst_p;
00070
00071 if (tocode == NULL) {
00072
00073 *enable_conv = FALSE;
00074 } else {
00075
00076 if (fromcode == NULL) {
00077 j_printerr("Error: charset names of both input and output should be given.\n");
00078 j_printerr("Error: use \"-charconv from to\" instead of \"-kanji\".\n");
00079 *enable_conv = FALSE;
00080 return FALSE;
00081 }
00082 euctosjis = FALSE;
00083 if (strmatch(fromcode, "euc-jp")
00084 || strmatch(fromcode, "euc")
00085 || strmatch(fromcode, "eucjp")) {
00086
00087 euctosjis = TRUE;
00088
00089 from_cp = 932;
00090 } else if (strmatch(fromcode, "ansi")) {
00091
00092 from_cp = CP_ACP;
00093 } else if (strmatch(fromcode, "mac")) {
00094
00095 from_cp = CP_MACCP;
00096 } else if (strmatch(fromcode, "oem")) {
00097
00098 from_cp = CP_OEMCP;
00099 } else if (strmatch(fromcode, "utf-7")) {
00100
00101 from_cp = CP_UTF7;
00102 } else if (strmatch(fromcode, "utf-8")) {
00103
00104 from_cp = CP_UTF8;
00105 } else if (strmatch(fromcode, "sjis")
00106 || strmatch(fromcode, "sjis-win")
00107 || strmatch(fromcode, "shift-jis")
00108 || strmatch(fromcode, "shift_jis")) {
00109
00110 from_cp = 932;
00111 } else if (fromcode[0] >= '0' && fromcode[0] <= '9') {
00112
00113 from_cp = atoi(fromcode);
00114 if (! IsValidCodePage(from_cp)) {
00115 j_printerr("Error: codepage #%d not found\n", from_cp);
00116 *enable_conv = FALSE;
00117 return FALSE;
00118 }
00119 } else {
00120 j_printerr("Error: unknown source codepage \"%s\"\n", fromcode);
00121 j_printerr("Error: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n");
00122 j_printerr("Error: the default local charcode can be speicified by \"ansi\".\n");
00123 *enable_conv = FALSE;
00124 return FALSE;
00125 }
00126
00127 if (strmatch(tocode, "ansi")) {
00128
00129 to_cp = CP_ACP;
00130 } else if (strmatch(tocode, "mac")) {
00131
00132 to_cp = CP_MACCP;
00133 } else if (strmatch(tocode, "oem")) {
00134
00135 to_cp = CP_OEMCP;
00136 } else if (strmatch(tocode, "utf-7")) {
00137
00138 to_cp = CP_UTF7;
00139 } else if (strmatch(tocode, "utf-8")) {
00140
00141 to_cp = CP_UTF8;
00142 } else if (strmatch(tocode, "sjis")
00143 || strmatch(tocode, "sjis-win")
00144 || strmatch(tocode, "shift-jis")
00145 || strmatch(tocode, "shift_jis")) {
00146
00147 to_cp = 932;
00148 } else if (tocode[0] >= '0' && tocode[0] <= '9') {
00149
00150 to_cp = atoi(tocode);
00151 if (! IsValidCodePage(to_cp)) {
00152 j_printerr("Error: codepage #%d not found\n", to_cp);
00153 *enable_conv = FALSE;
00154 return FALSE;
00155 }
00156 } else {
00157 j_printerr("Error: unknown target codepage \"%s\"\n", tocode);
00158 j_printerr("Error: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n");
00159 j_printerr("Error: the default local charcode can be speicified by \"ansi\".\n");
00160 *enable_conv = FALSE;
00161 return FALSE;
00162 }
00163
00164
00165 src_p = from_cp;
00166 dst_p = to_cp;
00167 if (src_p == CP_ACP) src_p = GetACP();
00168 if (dst_p == CP_ACP) dst_p = GetACP();
00169 if (src_p == CP_OEMCP) src_p = GetOEMCP();
00170 if (dst_p == CP_OEMCP) dst_p = GetOEMCP();
00171
00172 if (src_p == dst_p) {
00173 if (euctosjis == FALSE) {
00174 only_euc_conv = FALSE;
00175 *enable_conv = FALSE;
00176 } else {
00177 only_euc_conv = TRUE;
00178 *enable_conv = TRUE;
00179 }
00180 } else {
00181 only_euc_conv = FALSE;
00182 *enable_conv = TRUE;
00183 }
00184 }
00185
00186 return TRUE;
00187 }
00188
00189 #define UNICODE_BUFFER_SIZE 4096
00190 static wchar_t unibuf[UNICODE_BUFFER_SIZE];
00191
00192
00202 char *
00203 charconv_win32(char *instr, char *outstr, int maxoutlen)
00204 {
00205 int unilen, newlen;
00206 char *srcbuf;
00207
00208 srcbuf = instr;
00209 if (euctosjis == TRUE) {
00210
00211 toStringSJIS(instr, outstr, maxoutlen);
00212 srcbuf = outstr;
00213 if (only_euc_conv) {
00214 return(outstr);
00215 }
00216 }
00217
00218
00219 unilen = MultiByteToWideChar(from_cp, 0, srcbuf, -1, NULL, 0);
00220 if (unilen <= 0) {
00221 j_printerr("conversion error?\n");
00222 return(instr);
00223 }
00224 if (unilen > UNICODE_BUFFER_SIZE) {
00225 j_printerr("InternalError: unicode buffer size exceeded (%d > %d)!\n", unilen, UNICODE_BUFFER_SIZE);
00226 return(instr);
00227 }
00228
00229 MultiByteToWideChar(from_cp, 0, srcbuf, -1, unibuf, unilen);
00230
00231 newlen = WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, 0, NULL, NULL);
00232 if (newlen <= 0) {
00233 j_printerr("conversion error?\n");
00234 return(instr);
00235 }
00236 if (newlen > maxoutlen) {
00237 j_printerr("InternalError: target buffer size exceeded (%d > %d)!\n", newlen, maxoutlen);
00238 return(instr);
00239 }
00240
00241 WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, newlen, NULL, NULL);
00242
00243 return(outstr);
00244 }
00245
00246 #endif
00247
00248 #endif