root/CodeConvert.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. CCodeConvert
  2. CCodeConvert
  3. guess
  4. sjis_to_jis
  5. jis_to_sjis
  6. getc
  7. putc
  8. JisToSjis
  9. SjisToJis
  10. EucToSjis
  11. ToJis
  12. ToSjis

/*
 * Copyright (C) 2002-2003 chik, s.hiranaka
 * For license terms, see the file COPYING in this directory.
 */

// CodeConvert.cpp: CCodeConvert
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "Pochy.h"
#include "CodeConvert.h"
//#include "iconv.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

#define JIS             1
#define SJIS    2
#define EUC             3

CCodeConvert::CCodeConvert(CString &Src)
{
        int length;

        m_pSrc = Src.GetBuffer(0);
        m_in_counter = 0;
        m_out_counter = 0;

        length = Src.GetLength() * 2;
        if(length < 64*1024) length = 64*1024;

        m_buf = new char [length];
}

CCodeConvert::~CCodeConvert()
{
        if(m_buf != NULL) delete [] m_buf;
}

int CCodeConvert::guess()
{
        int i, max, bad_euc, bad_sjis;

        max = lstrlen(m_pSrc)-2;
        if(max > 8*1024) max = 8*1024;

        for(i = 0; i < max; i++){
                if((unsigned char)m_pSrc[i] == 0x1b){
                        if((unsigned char)m_pSrc[i+1] == '$'){
                                if((unsigned char)m_pSrc[i+2] == '@' || (unsigned char)m_pSrc[i+2] =='B'){
                                        return JIS;
                                }
                        }
                        else if((unsigned char)m_pSrc[i+1] == '('){
                                if((unsigned char)m_pSrc[i+2] == 'B' || (unsigned char)m_pSrc[i+2] == 'J' || (unsigned char)m_pSrc[i+2] == 'I'){
                                        return JIS;
                                }
                        }
                }
        }

        bad_euc = 0;
        for(i = 0; i < max; i++){
                if(iseuc((unsigned char)m_pSrc[i]) && ++i < max){
                        if(!iseuc((unsigned char)m_pSrc[i])){
                                bad_euc += 10;
                                i--;
                        }
                        else if((unsigned char)m_pSrc[i-1] >= 0xd0)
                                bad_euc++;
                }
                else if((unsigned char)m_pSrc[i] == 0x8e && ++i < max){
                        if(ishankana((unsigned char)m_pSrc[i]))
                                bad_euc++;
                        else{
                                bad_euc += 10;
                                i--;
                        }
                }
                else if((unsigned char)m_pSrc[i] >= 0x80)
                        bad_euc += 10;
        }

        bad_sjis = 0;
        for(i = 0; i < max; i++){
                if(issjis1((unsigned char)m_pSrc[i]) && ++i < max){
                        if(! issjis2((unsigned char)m_pSrc[i])){
                                bad_sjis += 10;
                                i--;
                        }
                        else if((unsigned) ((unsigned char)m_pSrc[i-1] * 256U + (unsigned char)m_pSrc[i]) >= 0x989f)
                                bad_sjis++;
                }
                else if ((unsigned char)m_pSrc[i] >= 0x80){
                        if(ishankana((unsigned char)m_pSrc[i])) bad_sjis++;
                        else bad_sjis += 10;
                }
        }

        if(bad_sjis <= bad_euc) return SJIS;
        else return EUC;
}

void CCodeConvert::sjis_to_jis(int *ph, int *pl)
{
        if(*ph <= 0x9f){
                if(*pl < 0x9f) *ph = (*ph << 1) - 0xe1;
                else *ph = (*ph << 1) - 0xe0;
        }else{
                if(*pl < 0x9f) *ph = (*ph << 1) - 0x161;
                else *ph = (*ph << 1) - 0x160;
        }
        if(*pl < 0x7f) *pl -= 0x1f;
        else if(*pl < 0x9f) *pl -= 0x20;
        else *pl -= 0x7e;
}

void CCodeConvert::jis_to_sjis(int *ph, int *pl)
{
        if(*ph & 1){
                if(*pl < 0x60) *pl += 0x1f;
                else *pl += 0x20;
        }
        else *pl += 0x7e;
        if(*ph < 0x5f) *ph = (*ph + 0xe1) >> 1;
        else *ph = (*ph + 0x161) >> 1;
}

int CCodeConvert::getc()
{
        int c;

        if(m_pSrc[m_in_counter] == '\0') return EOF;

        c = (int)(unsigned char)m_pSrc[m_in_counter];
        m_in_counter++;

        return c;
}

int CCodeConvert::putc(int c)
{
        char ch;

        if(c == EOF) ch = '\0';
        else ch = (char)c;

        m_buf[m_out_counter] = ch;
        m_out_counter++;

        if(ch == '\0') return EOF;
        else return TRUE;
}

LPCTSTR CCodeConvert::JisToSjis()
{
        int c, d, flag;
        enum {NORMAL, KANJI, HANKANA} mode = NORMAL;

        flag = 0;
        while (flag || (c = getc()) != EOF){
                flag = 0;
                if (c == 0x1b){
                        if ((c = getc()) == '$'){
                                if ((c = getc()) == '@' || c == 'B'){
                                        mode = KANJI;
                                }
                        }
                        else if (c == '('){
                                if ((c = getc()) == 'B' || c == 'J'){
                                        mode = NORMAL;
                                } 
                                else if (c == 'I'){
                                        mode = HANKANA;
                                }
                        }
                }
                else if (c == 0x0e){
                        mode = HANKANA;
                }
                else if (c == 0x0f){
                        mode = NORMAL;
                }
                else if (mode == KANJI && isjis(c)){
                        d = getc();
                        if (isjis(d)){
                                jis_to_sjis(&c, &d);
                                putc(c); putc(d);
                        }else{
                                c = d; flag = 1;
                        }
                }
                else if (mode == HANKANA) {
                        if (c >= 0x20 && c <= 0x5f){
                                putc(c | 0x80);
                        }else{
                                mode = NORMAL;
                                flag = 1;
                        }
                }
                else if (c == '\t' || c == '\n' || c == '\r' ||
                        c == 12 || (c >= ' ' && c < 0x7f))
                {
                        putc(c);
                }
        }
        putc(EOF);

        m_in_counter = 0;
        m_out_counter = 0;
        return (LPCTSTR)m_buf;


/*      CString message = m_pSrc;
        size_t in_len = message.GetLength();
        size_t out_len = message.GetLength()*60;

        char *in_buf_p = message.GetBuffer(0);
        char *out_buf_p = new char[out_len];
        char *start = out_buf_p;

        iconv_t cd;
        cd = iconv_open("SHIFT-JIS", "ISO-2022-JP-1");
//      if(cd == (iconv_t)-1) return NULL;
        if(cd ==(iconv_t)-1)
        {
                switch(errno)
                {
                case EMFILE:
                        AfxMessageBox("呼び出しもとのプロセスで既にOPEN_MAX個の記述子がオープンされている\n");
                        break;
                case ENFILE:
                        AfxMessageBox("オープンされているファイルの和が多すぎる\n");
                        break;
                case ENOMEM:
                        AfxMessageBox("記憶領域が不足している\n");
                        break;
                case EINVAL:
                        AfxMessageBox("fromcodeおよびtocode引数で指定したコード変換をサポートしていない\n");
                        break;
                }
                return NULL;
        }
        iconv(cd, NULL, NULL, NULL, NULL);
        size_t ret = iconv(cd, (const char **)&in_buf_p, &in_len, &out_buf_p, &out_len);

        if( (size_t)ret== -1 )
        {
                //lenに0以上帰って来た場合、再確保を行わずに返す。
                switch( errno )
                {
                case EBADF:
                        AfxMessageBox("コードセット・コンバーターの記述子ではない。\n");
                        break;
                case E2BIG:
                        AfxMessageBox("変化後の文字列取得領域不足\n");
                        break;
//              case ELISEQ:
//                      AfxMessageBox("入力バッファに変換元のコードセットに属しないデータが存在\n");
//                      break;
                case EINVAL:
                        AfxMessageBox("入力バッファの最後に不完全な文字が存在した。\n");
                        break;
                }
                return NULL;
        }

        *out_buf_p = '\0';
        iconv_close(cd);
        message = start;
        delete out_buf_p;

        return message.GetBuffer(0);*/

/*      iconv_t cd;
        size_t len;
        size_t left;
        size_t size;

        len = left = size = strlen(m_pSrc);

        cd = iconv_open("SHIFT_JIS", "ISO-2022-JP-2");

        if(cd ==(iconv_t)-1)
        {
                switch(errno)
                {
                case EMFILE:
                        AfxMessageBox("呼び出しもとのプロセスで既にOPEN_MAX個の記述子がオープンされている\n");
                        break;
                case ENFILE:
                        AfxMessageBox("オープンされているファイルの和が多すぎる\n");
                        break;
                case ENOMEM:
                        AfxMessageBox("記憶領域が不足している\n");
                        break;
                case EINVAL:
                        AfxMessageBox("fromcodeおよびtocode引数で指定したコード変換をサポートしていない\n");
                        break;
                }
                return NULL;
        }

        iconv(cd, (const char**)&m_pSrc, &len, &m_buf, &left);
        memset(m_buf, '\0', size-left + 2);
        iconv_close(cd);
        CString test = m_buf;
        return test.GetBuffer(0);*/
}

LPCTSTR CCodeConvert::SjisToJis(void)
{
        int c, d, flag;
        enum {NORMAL, KANJI, HANKANA} mode = NORMAL;

        flag = 0;
        while (flag || (c = getc()) != EOF){
                flag = 0;
                if (issjis1(c)){
                        d = getc();
                        if (issjis2(d)){
                                sjis_to_jis(&c, &d);
                                if (mode != KANJI){
                                        mode = KANJI;
                                        putc(0x1b); putc('$'); putc('B');
                                }
                                putc(c); putc(d);
                        }else{
                                c = d; flag = 1;
                        }
                }
                else if (ishankana(c)){
                        if (mode != HANKANA){
                                mode = HANKANA;
                                putc(0x1b); putc('('); putc('I');
                        }
                        putc(c & 0x7f);
                }else{
                        if (mode != NORMAL){
                                putc(0x1b); putc('('); putc('B');
                        }
                        mode = NORMAL;
                        if (c == '\t' || c == '\n' || c == '\r' ||
                                c == 12 || (c >= ' ' && c < 0x7f))
                        {
                                putc(c);
                        }
                }
        }
        if(mode!=NORMAL){
                putc(0x1b); putc('('); putc('B');
        }
        putc(EOF);

        m_in_counter = 0;
        m_out_counter = 0;

        return (LPCTSTR)m_buf;
}

LPCTSTR CCodeConvert::EucToSjis(void)
{
        int c, d, flag;
        enum {NORMAL, KANJI, HANKANA} mode = NORMAL;

        flag = 0;
        while(flag || (c = getc()) != EOF){
                flag = 0;

                if(iseuc(c)){
                        d = getc();

                        if (iseuc(d)){
                                c &= 0x7f; d &= 0x7f;   //EUC→JIS
                                jis_to_sjis(&c, &d);
                                putc(c); putc(d);
                        }else{
                                c = d; flag = 1;
                        }
                } 
                else if(c == 0x8e){
                        c = getc();
                        if (ishankana(c)){
                                putc(c);
                        }else{
                                flag = 1;
                        }
                }
                else if (c == '\t' || c == '\n' || c == '\r' ||
                                   c == 12 || (c >= ' ' && c < 0x7f)) 
                {
                        putc(c);
                }
        }
        putc(EOF);

        m_in_counter = 0;
        m_out_counter = 0;

        return (LPCTSTR)m_buf;
}

LPCTSTR CCodeConvert::ToJis()
{
        switch(guess()){
        case JIS:
                return m_pSrc;
        case SJIS:
                return SjisToJis();
        default:
                return NULL;
        }
}

LPCTSTR CCodeConvert::ToSjis()
{
        switch(guess()){
        case JIS:
                return JisToSjis();
        case SJIS:
                return m_pSrc;
        case EUC:
                return EucToSjis();
        default:
                return NULL;
        }
}

/* [<][>][^][v][top][bottom][index][help] */