root/CodeConvert.cpp
/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- CCodeConvert
- CCodeConvert
- guess
- sjis_to_jis
- jis_to_sjis
- getc
- putc
- JisToSjis
- SjisToJis
- EucToSjis
- ToJis
- ToSjis
/*
* Copyright (C) 2002-2003 chik, s.hiranaka
* For license terms, see the file COPYING in this directory.
*/
// CodeConvert.cpp: CCodeConvert
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "Pochy.h"
#include "CodeConvert.h"
//#include "iconv.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
#define JIS 1
#define SJIS 2
#define EUC 3
CCodeConvert::CCodeConvert(CString &Src)
{
int length;
m_pSrc = Src.GetBuffer(0);
m_in_counter = 0;
m_out_counter = 0;
length = Src.GetLength() * 2;
if(length < 64*1024) length = 64*1024;
m_buf = new char [length];
}
CCodeConvert::~CCodeConvert()
{
if(m_buf != NULL) delete [] m_buf;
}
int CCodeConvert::guess()
{
int i, max, bad_euc, bad_sjis;
max = lstrlen(m_pSrc)-2;
if(max > 8*1024) max = 8*1024;
for(i = 0; i < max; i++){
if((unsigned char)m_pSrc[i] == 0x1b){
if((unsigned char)m_pSrc[i+1] == '$'){
if((unsigned char)m_pSrc[i+2] == '@' || (unsigned char)m_pSrc[i+2] =='B'){
return JIS;
}
}
else if((unsigned char)m_pSrc[i+1] == '('){
if((unsigned char)m_pSrc[i+2] == 'B' || (unsigned char)m_pSrc[i+2] == 'J' || (unsigned char)m_pSrc[i+2] == 'I'){
return JIS;
}
}
}
}
bad_euc = 0;
for(i = 0; i < max; i++){
if(iseuc((unsigned char)m_pSrc[i]) && ++i < max){
if(!iseuc((unsigned char)m_pSrc[i])){
bad_euc += 10;
i--;
}
else if((unsigned char)m_pSrc[i-1] >= 0xd0)
bad_euc++;
}
else if((unsigned char)m_pSrc[i] == 0x8e && ++i < max){
if(ishankana((unsigned char)m_pSrc[i]))
bad_euc++;
else{
bad_euc += 10;
i--;
}
}
else if((unsigned char)m_pSrc[i] >= 0x80)
bad_euc += 10;
}
bad_sjis = 0;
for(i = 0; i < max; i++){
if(issjis1((unsigned char)m_pSrc[i]) && ++i < max){
if(! issjis2((unsigned char)m_pSrc[i])){
bad_sjis += 10;
i--;
}
else if((unsigned) ((unsigned char)m_pSrc[i-1] * 256U + (unsigned char)m_pSrc[i]) >= 0x989f)
bad_sjis++;
}
else if ((unsigned char)m_pSrc[i] >= 0x80){
if(ishankana((unsigned char)m_pSrc[i])) bad_sjis++;
else bad_sjis += 10;
}
}
if(bad_sjis <= bad_euc) return SJIS;
else return EUC;
}
void CCodeConvert::sjis_to_jis(int *ph, int *pl)
{
if(*ph <= 0x9f){
if(*pl < 0x9f) *ph = (*ph << 1) - 0xe1;
else *ph = (*ph << 1) - 0xe0;
}else{
if(*pl < 0x9f) *ph = (*ph << 1) - 0x161;
else *ph = (*ph << 1) - 0x160;
}
if(*pl < 0x7f) *pl -= 0x1f;
else if(*pl < 0x9f) *pl -= 0x20;
else *pl -= 0x7e;
}
void CCodeConvert::jis_to_sjis(int *ph, int *pl)
{
if(*ph & 1){
if(*pl < 0x60) *pl += 0x1f;
else *pl += 0x20;
}
else *pl += 0x7e;
if(*ph < 0x5f) *ph = (*ph + 0xe1) >> 1;
else *ph = (*ph + 0x161) >> 1;
}
int CCodeConvert::getc()
{
int c;
if(m_pSrc[m_in_counter] == '\0') return EOF;
c = (int)(unsigned char)m_pSrc[m_in_counter];
m_in_counter++;
return c;
}
int CCodeConvert::putc(int c)
{
char ch;
if(c == EOF) ch = '\0';
else ch = (char)c;
m_buf[m_out_counter] = ch;
m_out_counter++;
if(ch == '\0') return EOF;
else return TRUE;
}
LPCTSTR CCodeConvert::JisToSjis()
{
int c, d, flag;
enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
flag = 0;
while (flag || (c = getc()) != EOF){
flag = 0;
if (c == 0x1b){
if ((c = getc()) == '$'){
if ((c = getc()) == '@' || c == 'B'){
mode = KANJI;
}
}
else if (c == '('){
if ((c = getc()) == 'B' || c == 'J'){
mode = NORMAL;
}
else if (c == 'I'){
mode = HANKANA;
}
}
}
else if (c == 0x0e){
mode = HANKANA;
}
else if (c == 0x0f){
mode = NORMAL;
}
else if (mode == KANJI && isjis(c)){
d = getc();
if (isjis(d)){
jis_to_sjis(&c, &d);
putc(c); putc(d);
}else{
c = d; flag = 1;
}
}
else if (mode == HANKANA) {
if (c >= 0x20 && c <= 0x5f){
putc(c | 0x80);
}else{
mode = NORMAL;
flag = 1;
}
}
else if (c == '\t' || c == '\n' || c == '\r' ||
c == 12 || (c >= ' ' && c < 0x7f))
{
putc(c);
}
}
putc(EOF);
m_in_counter = 0;
m_out_counter = 0;
return (LPCTSTR)m_buf;
/* CString message = m_pSrc;
size_t in_len = message.GetLength();
size_t out_len = message.GetLength()*60;
char *in_buf_p = message.GetBuffer(0);
char *out_buf_p = new char[out_len];
char *start = out_buf_p;
iconv_t cd;
cd = iconv_open("SHIFT-JIS", "ISO-2022-JP-1");
// if(cd == (iconv_t)-1) return NULL;
if(cd ==(iconv_t)-1)
{
switch(errno)
{
case EMFILE:
AfxMessageBox("呼び出しもとのプロセスで既にOPEN_MAX個の記述子がオープンされている\n");
break;
case ENFILE:
AfxMessageBox("オープンされているファイルの和が多すぎる\n");
break;
case ENOMEM:
AfxMessageBox("記憶領域が不足している\n");
break;
case EINVAL:
AfxMessageBox("fromcodeおよびtocode引数で指定したコード変換をサポートしていない\n");
break;
}
return NULL;
}
iconv(cd, NULL, NULL, NULL, NULL);
size_t ret = iconv(cd, (const char **)&in_buf_p, &in_len, &out_buf_p, &out_len);
if( (size_t)ret== -1 )
{
//lenに0以上帰って来た場合、再確保を行わずに返す。
switch( errno )
{
case EBADF:
AfxMessageBox("コードセット・コンバーターの記述子ではない。\n");
break;
case E2BIG:
AfxMessageBox("変化後の文字列取得領域不足\n");
break;
// case ELISEQ:
// AfxMessageBox("入力バッファに変換元のコードセットに属しないデータが存在\n");
// break;
case EINVAL:
AfxMessageBox("入力バッファの最後に不完全な文字が存在した。\n");
break;
}
return NULL;
}
*out_buf_p = '\0';
iconv_close(cd);
message = start;
delete out_buf_p;
return message.GetBuffer(0);*/
/* iconv_t cd;
size_t len;
size_t left;
size_t size;
len = left = size = strlen(m_pSrc);
cd = iconv_open("SHIFT_JIS", "ISO-2022-JP-2");
if(cd ==(iconv_t)-1)
{
switch(errno)
{
case EMFILE:
AfxMessageBox("呼び出しもとのプロセスで既にOPEN_MAX個の記述子がオープンされている\n");
break;
case ENFILE:
AfxMessageBox("オープンされているファイルの和が多すぎる\n");
break;
case ENOMEM:
AfxMessageBox("記憶領域が不足している\n");
break;
case EINVAL:
AfxMessageBox("fromcodeおよびtocode引数で指定したコード変換をサポートしていない\n");
break;
}
return NULL;
}
iconv(cd, (const char**)&m_pSrc, &len, &m_buf, &left);
memset(m_buf, '\0', size-left + 2);
iconv_close(cd);
CString test = m_buf;
return test.GetBuffer(0);*/
}
LPCTSTR CCodeConvert::SjisToJis(void)
{
int c, d, flag;
enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
flag = 0;
while (flag || (c = getc()) != EOF){
flag = 0;
if (issjis1(c)){
d = getc();
if (issjis2(d)){
sjis_to_jis(&c, &d);
if (mode != KANJI){
mode = KANJI;
putc(0x1b); putc('$'); putc('B');
}
putc(c); putc(d);
}else{
c = d; flag = 1;
}
}
else if (ishankana(c)){
if (mode != HANKANA){
mode = HANKANA;
putc(0x1b); putc('('); putc('I');
}
putc(c & 0x7f);
}else{
if (mode != NORMAL){
putc(0x1b); putc('('); putc('B');
}
mode = NORMAL;
if (c == '\t' || c == '\n' || c == '\r' ||
c == 12 || (c >= ' ' && c < 0x7f))
{
putc(c);
}
}
}
if(mode!=NORMAL){
putc(0x1b); putc('('); putc('B');
}
putc(EOF);
m_in_counter = 0;
m_out_counter = 0;
return (LPCTSTR)m_buf;
}
LPCTSTR CCodeConvert::EucToSjis(void)
{
int c, d, flag;
enum {NORMAL, KANJI, HANKANA} mode = NORMAL;
flag = 0;
while(flag || (c = getc()) != EOF){
flag = 0;
if(iseuc(c)){
d = getc();
if (iseuc(d)){
c &= 0x7f; d &= 0x7f; //EUC→JIS
jis_to_sjis(&c, &d);
putc(c); putc(d);
}else{
c = d; flag = 1;
}
}
else if(c == 0x8e){
c = getc();
if (ishankana(c)){
putc(c);
}else{
flag = 1;
}
}
else if (c == '\t' || c == '\n' || c == '\r' ||
c == 12 || (c >= ' ' && c < 0x7f))
{
putc(c);
}
}
putc(EOF);
m_in_counter = 0;
m_out_counter = 0;
return (LPCTSTR)m_buf;
}
LPCTSTR CCodeConvert::ToJis()
{
switch(guess()){
case JIS:
return m_pSrc;
case SJIS:
return SjisToJis();
default:
return NULL;
}
}
LPCTSTR CCodeConvert::ToSjis()
{
switch(guess()){
case JIS:
return JisToSjis();
case SJIS:
return m_pSrc;
case EUC:
return EucToSjis();
default:
return NULL;
}
}