root/HeaderInfo.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. CHeaderInfo
  2. CHeaderInfo
  3. CHeaderInfo
  4. DoIt
  5. Analyze
  6. GetAllDecoded
  7. GetAllUnDecoded
  8. GetAllUnDecoded
  9. GetOneLine
  10. RemoveFoldingWhiteSpace
  11. Decode
  12. GetSubject
  13. GetTo
  14. GetCc
  15. GetBcc
  16. GetDate
  17. GetFrom
  18. GetXmailer
  19. GetReplyTo
  20. GetMsgID
  21. GetInReplyTo
  22. GetCTE
  23. GetEnc
  24. GetCT
  25. GetCD
  26. GetBoundary
  27. GetCharset
  28. GetFilename
  29. GetName
  30. GetProtocol
  31. GetXattachment
  32. IsMultipart
  33. ReformDate
  34. AnalyzeCT
  35. AnalyzeCD
  36. GetCType
  37. GetCSubType
  38. InitMemVal

/*
 * Copyright (C) 2002-2003 chik, s.hiranaka
 * For license terms, see the file COPYING in this directory.
 */

// HeaderInfo.cpp: CHeaderInfo クラスのインプリメンテーション
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "Pochy.h"
#include "HeaderInfo.h"
#include "Fetchmail.h"
#include "quoted-printable.h"
#include "CodeConvert.h"
#include "base64.h"
#include "_regex.h"
#include "lib.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// 構築/消滅
//////////////////////////////////////////////////////////////////////

CHeaderInfo::CHeaderInfo(CString buf)
{
        int point;

        // メンバ変数の初期化
        InitMemVal();

        // メール全体が渡された場合は、ヘッダ部分だけを切り出す
        switch(point = buf.Find("\r\n\r\n")){
                case -1:
                        g_cstr2cstra(buf, m_undecoded_header, "\r\n");
                        break;
                default:
                        buf = buf.Left(point + strlen("\r\n"));
                        g_cstr2cstra(buf, m_undecoded_header, "\r\n");
                        break;
        }
        Decode();       // ヘッダを整えたり文字コード変換を行う
        Analyze();      // ヘッダを分析して、必要な情報をメンバに格納する
}

CHeaderInfo::CHeaderInfo()
{

}

CHeaderInfo::~CHeaderInfo()
{

}

BOOL CHeaderInfo::DoIt(CStringArray &buf)
{
        // メンバ変数の初期化
        InitMemVal();

        int n = 0;
        CString message;

        // メール全体が渡された場合は、ヘッダ部分だけを切り出す
        while(n < buf.GetSize()){
                if(buf.GetAt(n) == "\x0D\x0A")
                        break;
                m_undecoded_header.Add(buf.GetAt(n));
                n++;
        }
        Decode();       // ヘッダを整えたり文字コード変換を行う
        Analyze();      // ヘッダを分析して、必要な情報をメンバに格納する

        if(n == 0)
                return FALSE;

        return TRUE;
}

// ヘッダを分析して必要な情報をメンバ変数に格納する
void CHeaderInfo::Analyze()
{
        regex_t reg;
        int pos;
        CString field;
        for(int i = 0; i < m_decoded_header.GetSize(); i++){
                pos = m_decoded_header[i].Find(":");
                if(pos == -1)
                        continue;
                field = m_decoded_header[i].Left(pos);
                regcomp(&reg, field, REG_EXTENDED | REG_NEWLINE | REG_ICASE);

                if(!regexec(&reg, "^subject:", 0, NULL, 0)){
                        regfree(&reg);
                        m_subject = m_decoded_header[i].Mid(strlen("Subject:"));
                        g_cstr_chop(m_subject);
                        continue;
                }
                if(!regexec(&reg, "^cc:", 0, NULL, 0)){
                        regfree(&reg);
                        m_cc = m_decoded_header[i].Mid(strlen("cc:"));
                        g_cstr_chop(m_cc);
                        continue;
                }
                if(!regexec(&reg, "^bcc:", 0, NULL, 0)){
                        regfree(&reg);
                        m_bcc = m_decoded_header[i].Mid(strlen("bcc:"));
                        g_cstr_chop(m_bcc);
                        continue;
                }
                if(!regexec(&reg, "^from:", 0, NULL, 0)){
                        regfree(&reg);
                        m_from = m_decoded_header[i].Mid(strlen("from:"));
                        g_cstr_chop(m_from);
                        continue;
                }
                if(!regexec(&reg, "^to:", 0, NULL, 0)){
                        regfree(&reg);
                        m_to = m_decoded_header[i].Mid(strlen("to:"));
                        g_cstr_chop(m_to);
                        continue;
                }
                if(!regexec(&reg, "^x-mailer:", 0, NULL, 0)){
                        regfree(&reg);
                        m_xmailer = m_decoded_header[i].Mid(strlen("x-mailer:"));
                        g_cstr_chop(m_xmailer);
                        continue;
                }
                if(!regexec(&reg, "^reply-to:", 0, NULL, 0)){
                        regfree(&reg);
                        m_replyto = m_decoded_header[i].Mid(strlen("reply-to:"));
                        g_cstr_chop(m_replyto);
                        continue;
                }
                if(!regexec(&reg, "^date:", 0, NULL, 0)){
                        regfree(&reg);
                        m_date = ReformDate(m_decoded_header[i]);
                        continue;
                }
                if(!regexec(&reg, "^content-transfer-encoding:", 0, NULL, 0)){
                        regfree(&reg);
                        CString cte;
                        m_cte = m_decoded_header[i];
                        m_cte.TrimRight("\r\n");
                        cte = m_decoded_header[i].Mid(strlen("Content-Transfer-Encoding:"));
                        g_cstr_chop(cte);
                        if(g_cstr_compare(cte.GetBuffer(0), "7bit"))
                                m_enc = SEVEN_BIT;
                        if(g_cstr_compare(cte.GetBuffer(0), "8bit"))
                                m_enc = EIGHT_BIT;
                        if(g_cstr_compare(cte.GetBuffer(0), "bin"))
                                m_enc = BIN;
                        if(g_cstr_compare(cte.GetBuffer(0), "base64"))
                                m_enc = BASE64;
                        if(g_cstr_compare(cte.GetBuffer(0), "x-gzip64")) // x-gzip64は取りあえずbase64にしとく(zlib.dllが必要だそうな)
                                m_enc = BASE64;
                        if(g_cstr_compare(cte.GetBuffer(0), "quoted-printable"))
                                m_enc = QUOTED_PRINTABLE;
                        continue;
                }
                if(!regexec(&reg, "^message-id:", 0, NULL, 0)){
                        regfree(&reg);
                        m_msg_id = m_decoded_header[i].Mid(strlen("Message-ID:"));
                        g_cstr_chop(m_msg_id);
                        continue;
                }
                if(!regexec(&reg, "^in-reply-to:", 0, NULL, 0)){
                        regfree(&reg);
                        m_in_reply_to = m_decoded_header[i].Mid(strlen("In-Reply-To:"));
                        g_cstr_chop(m_in_reply_to);
                        continue;
                }
                if(!regexec(&reg, "^content-type:", 0, NULL, 0)){
                        regfree(&reg);
                        m_ct = m_decoded_header[i];
                        g_cstr_chop(m_ct);
                        AnalyzeCT();
                        continue;
                }
                if(!regexec(&reg, "^content-disposition:", 0, NULL, 0)){
                        regfree(&reg);
                        m_cd = m_decoded_header[i];
                        g_cstr_chop(m_cd);
                        AnalyzeCD();
                        continue;
                }
                regfree(&reg);
        }
        /* Content-Typeが存在しない場合は、
        Content-Type: Text/Plain; charset=US-ASCII
        Content-Transfer-Encoding: 7bit
        にする */
        if(m_type.IsEmpty())
                m_type = "Text/Plain";
        if(m_charset.IsEmpty())
                m_charset = "US-ASCII ";
        // InitMemVal()でm_cteは0(7bit)に設定済み
}

CString CHeaderInfo::GetAllDecoded()
{
        CString buf;

        g_cstra2cstr(m_decoded_header, buf, g_cstra_getsize(m_decoded_header));
        return buf;
}

CString CHeaderInfo::GetAllUnDecoded()
{
        CString buf;

        g_cstra2cstr(m_undecoded_header, buf, g_cstra_getsize(m_undecoded_header));
        return buf;
}

void CHeaderInfo::GetAllUnDecoded(CStringArray &header)
{
        header.Copy(m_undecoded_header);
}

// メンバのm_decoded_headerからkeyを行頭に持つ一行を取り出す関数
CString CHeaderInfo::GetOneLine(LPCTSTR key)
{
        CString buf;
        int n = 0;

        // 正規表現関数を使う為の前準備
        regex_t reg;
        regmatch_t pmatch[1];

        int len = sizeof(char)*10+strlen(key);
        char *r = new char[len];
        wsprintf(r, "^%s.*$", key);

        regcomp(&reg, r, REG_EXTENDED | REG_NEWLINE | REG_ICASE);

        while(n < m_decoded_header.GetSize()){
                buf = m_decoded_header.GetAt(n);

                if(0 == regexec(&reg, buf.GetBuffer(0), 1, pmatch, 0)){
                        delete r;
                        buf.ReleaseBuffer();
                        regfree(&reg);
                        return buf;
                }
                buf.ReleaseBuffer();
                n++;
        }

        regfree(&reg);
        delete r;
        buf.Empty();
        return buf; // 見つからなかった場合は空を返す
}

// メールヘッダのfolding whilte spaceを削除する関数
void CHeaderInfo::RemoveFoldingWhiteSpace(CStringArray *header)
{

        CString cline;
        CString pline;
        int n = 0;

        cline.Empty();
        pline.Empty();

        while(n < header->GetSize()){
                if(header->GetAt(n).Find(" ") == 0){
                        while(1){
                                if(n == header->GetSize() || header->GetAt(n).Find(" ") != 0) break;                                            
                                cline = header->GetAt(n);
                                while(cline.Find(" ") == 0) cline.Delete(0, 1);

                                pline = header->GetAt(n-1);
                                pline.TrimRight("\r\n");
                                pline += cline;
                                header->SetAt(n-1, pline.GetBuffer(0));
                                header->RemoveAt(n, 1);
                                pline.ReleaseBuffer();
                        }
                }
                n++;
        }

        n = 0;
        while(n < header->GetSize()){
                if(header->GetAt(n).Find("\t") == 0){
                        while(1){
                                if(n == header->GetSize() || header->GetAt(n).Find("\t") != 0) break;                                           
                                cline = header->GetAt(n);
                                while(cline.Find("\t") == 0) cline.Delete(0, 1);

                                pline = header->GetAt(n-1);
                                pline.TrimRight("\r\n");
                                pline += cline;
                                header->SetAt(n-1, pline.GetBuffer(0));
                                header->RemoveAt(n, 1);
                                pline.ReleaseBuffer();
                        }
                }
                n++;
        }

}

// メールヘッダ中のbase64、quoted-printableをデコードし、folding white spaceを取り除き、文字コードの変換も行う
void CHeaderInfo::Decode()
{
        
        // 正規表現関数用
        regex_t reg; // 正規表現格納用
        regmatch_t pmatch[5]; // regexecの結果が格納される
        char *regex = "=\\?([^\\?]+)\\?([BQbq])\\?([^\\?]*)\\?="; // =?ISO-2022-JP?B?hogehogehoge?=の為の正規表現
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE);

        CString temp1;
        CString temp2;
        CString buf;
        CString encode_type;    // B(base64)Q(quoted-printable)とか
        CString char_type;              // ISO-2022-JPとか
        CString char_encoded;   // encodeされた部分
        int len;
        int n = 0;
        CStringArray header;

        // バッファ用CStringArrayの初期化とデータのコピー
        header.RemoveAll();
        header.Copy(m_undecoded_header);

        // folding white space を取り除く
        RemoveFoldingWhiteSpace(&header);

        while(n < header.GetSize()){
                buf = header.GetAt(n);
                temp1.Empty();
                temp2.Empty();

                // 正規表現でbase64、quoted-printableでencodeされた部分をを引っ掛けてきて、decodeする
                while(1){
                        if(buf.Find("?")==-1)// できればregexec()やるまえにbreakしたい。
                                break;

                        if(0 != regexec(&reg, buf, 4, pmatch, 0))
                                break;

                        temp1.Empty();
                        temp2.Empty();
                        temp1                   +=      buf.Left(pmatch[0].rm_so);
                        encode_type             =       buf.Mid(pmatch[2].rm_so, pmatch[2].rm_eo-pmatch[2].rm_so);
                        char_type               =       buf.Mid(pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
                        char_encoded    =       buf.Mid(pmatch[3].rm_so, pmatch[3].rm_eo-pmatch[3].rm_so);

                        if(encode_type == "B" || encode_type == "b"){
                                len = from64tobits(temp2.GetBuffer(char_encoded.GetLength()), char_encoded.GetBuffer(0), char_encoded.GetLength());
                                temp2.ReleaseBuffer(len);
                                char_encoded.ReleaseBuffer();
                                temp1 += temp2;
                                temp2.Empty();
                        }

                        if(encode_type == "Q" || encode_type == "q"){
                                len = fromQPtobits(temp2.GetBuffer(char_encoded.GetLength()), char_encoded.GetBuffer(0), char_encoded.GetLength());
                                temp2.ReleaseBuffer(len);
                                char_encoded.ReleaseBuffer();
                                temp1 += temp2;
                                temp2.Empty();
                        }
                        temp1           +=      buf.Mid(pmatch[0].rm_eo);
                        buf                     =       temp1;
                }
                // 文字コードの変換
                CCodeConvert cc(buf);
                buf = cc.ToSjis();
                m_decoded_header.Add(buf);
                n++;
        }
        regfree(&reg);

}

CString CHeaderInfo::GetSubject()
{
        return m_subject;
}

CString CHeaderInfo::GetTo()
{
        return m_to;
}

CString CHeaderInfo::GetCc()
{
        return m_cc;
}

CString CHeaderInfo::GetBcc()
{
        return m_bcc;
}


CString CHeaderInfo::GetDate()
{
        return m_date;
}

CString CHeaderInfo::GetFrom()
{
        return m_from;
}

CString CHeaderInfo::GetXmailer()
{
        return m_xmailer;
}
CString CHeaderInfo::GetReplyTo()
{
        return m_replyto;
}

CString CHeaderInfo::GetMsgID()
{
        return m_msg_id;
}

CString CHeaderInfo::GetInReplyTo()
{
        return m_in_reply_to;
}

CString CHeaderInfo::GetCTE()
{
        return m_cte;
}

int CHeaderInfo::GetEnc()
{
        return m_enc;
}

CString CHeaderInfo::GetCT()
{
        return m_ct;
}

CString CHeaderInfo::GetCD()
{
        return m_cd;
}

CString CHeaderInfo::GetBoundary()
{
        return m_boundary;
}

CString CHeaderInfo::GetCharset()
{
        return m_charset;
}

CString CHeaderInfo::GetFilename()
{
        return m_filename;
}

CString CHeaderInfo::GetName()
{
        return m_name;
}

CString CHeaderInfo::GetProtocol()
{
        return m_protocol;
}

CString CHeaderInfo::GetXattachment()
{
        return m_xattachment;
}

BOOL CHeaderInfo::IsMultipart()
{
        if(!m_boundary.IsEmpty())
                return TRUE;
        return FALSE;
}

CString CHeaderInfo::ReformDate(CString buf)
{
        CString temp;
        CString day;
        CString month;
        CString year;
        CString hour;
        CString minute;
        CString second;
        CString zone;

        regex_t reg; // 正規表現格納用
        regmatch_t pmatch[10]; // regexecの結果が格納される

        char *regex = "Date:[ \t]*([a-zA-Z]+)*,* *([0-9]+) +([a-zA-Z]+) +([0-9]+) +([0-9]+):([0-9]+):*([0-9]*) +([+-0-9a-zA-Z]+)";
//              Date:     Mon, 3 Feb 2003 00:00:43 -0600
//              Date: Sat, 29 Mar 2003 05:01:48 +0900
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE | REG_ICASE);
        if(0 == regexec(&reg, buf, 9, pmatch, 0)){
                year = buf.Mid(pmatch[4].rm_so, pmatch[4].rm_eo-pmatch[4].rm_so);
                month = buf.Mid(pmatch[3].rm_so, pmatch[3].rm_eo-pmatch[3].rm_so);
                day = buf.Mid(pmatch[2].rm_so, pmatch[2].rm_eo-pmatch[2].rm_so);
                hour = buf.Mid(pmatch[5].rm_so, pmatch[5].rm_eo-pmatch[5].rm_so);
                minute = buf.Mid(pmatch[6].rm_so, pmatch[6].rm_eo-pmatch[6].rm_so);
                second = buf.Mid(pmatch[7].rm_so, pmatch[7].rm_eo-pmatch[7].rm_so);
                zone = buf.Mid(pmatch[8].rm_so, pmatch[8].rm_eo-pmatch[8].rm_so);
        }
        regfree(&reg);

        if(month.Find("Apr") != -1) month = "04";
        else if(month.Find("May") != -1) month = "05";
        else if(month.Find("Jun") != -1) month = "06";
        else if(month.Find("Jul") != -1) month = "07";
        else if(month.Find("Aug") != -1) month = "08";
        else if(month.Find("Sep") != -1) month = "09";
        else if(month.Find("Oct") != -1) month = "10";
        else if(month.Find("Nov") != -1) month = "11";
        else if(month.Find("Dec") != -1) month = "12";
        else if(month.Find("Jan") != -1) month = "01";
        else if(month.Find("Feb") != -1) month = "02";
        else if(month.Find("Mar") != -1) month = "03";
        else{
                buf.Empty();
                return buf;
        }

        // タイムゾーンの調整
        CTime t(atoi(year), atoi(month), atoi(day), atoi(hour), atoi(minute), atoi(second));
        CString dif_plus_minus;
        CString dif_hour;
        CString dif_minute;
        CString loc_plus_minus;
        CString loc_hour;
        CString loc_minute;
        CString loc_zone;
        // たぶんタイムゾーン名の場合(オフセットではなくて)
        if(!zone.SpanExcluding("0123456789+-").IsEmpty()){
                zone = g_get_tzoffset_from_tzname(zone);
        }
        dif_plus_minus = zone[0];
        dif_hour = zone.Mid(1, 2);
        dif_minute = zone.Mid(3, 2);
        // とりあえずGMTに直す
        if(dif_plus_minus == "-"){
                t += CTimeSpan(0, atoi(dif_hour), atoi(dif_minute), 0);
        }else if(dif_plus_minus == "+"){
                t -= CTimeSpan(0, atoi(dif_hour), atoi(dif_minute), 0);
        }
        // 日本時間に直す(タイムゾーンを取得してくる必要)
        loc_zone = DEF_TIME_ZONE;
        loc_plus_minus = loc_zone[0];
        loc_hour = loc_zone.Mid(1, 2);
        loc_minute = loc_zone.Mid(3, 2);
        if(loc_plus_minus == "-"){
                t -= CTimeSpan(0, atoi(loc_hour), atoi(loc_minute), 0);
        }else if(loc_plus_minus == "+"){
                t += CTimeSpan(0, atoi(loc_hour), atoi(loc_minute), 0);
        }

//      t += CTimeSpan(0, 9, 0, 0); // 現行では+900に決めうち

        return t.Format("%Y/%m/%d %H:%M:%S");
}

void CHeaderInfo::AnalyzeCT()
{
        regex_t reg; // 正規表現格納用
        regmatch_t pmatch[2]; // regexecの結果が格納される

        // type/subtype
        char *regex = "^content-type: *([^;\r\n]+).*$";
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE | REG_ICASE);
        if(0 == regexec(&reg, m_ct, 2, pmatch, 0)){
                m_type = m_ct.Mid(pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
        }
        regfree(&reg);

        // boundary
        regex = "boundary= *\"*([^\";\r\n]+)\"*";
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE | REG_ICASE);
        if(0 == regexec(&reg, m_ct, 2, pmatch, 0)){
                m_boundary = m_ct.Mid(pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
        }
        regfree(&reg);

        // charset
        regex = "charset= *\"*([^\";\r\n]+)\"*";
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE | REG_ICASE);
        if(0 == regexec(&reg, m_ct, 2, pmatch, 0)){
                m_charset = m_ct.Mid(pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
        }
        regfree(&reg);

        // name
        regex = "name= *\"*([^\";\r\n]+)\"*";
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE | REG_ICASE);
        if(0 == regexec(&reg, m_ct, 2, pmatch, 0)){
                m_name = m_ct.Mid(pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
        }
        regfree(&reg);

        // protocol pgp/mimeの場合
        regex = "protocol= *\"*([^\";\r\n]+)\"*";
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE | REG_ICASE);
        if(0 == regexec(&reg, m_ct, 2, pmatch, 0)){
                m_protocol = m_ct.Mid(pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
        }
        regfree(&reg);
}

void CHeaderInfo::AnalyzeCD()
{
        regex_t reg; // 正規表現格納用
        regmatch_t pmatch[2]; // regexecの結果が格納される

        // filename
        char *regex     = "filename= *\"*([^;\"\r\n]+)\"*";
        regcomp(&reg, regex, REG_EXTENDED | REG_NEWLINE | REG_ICASE);
        if(0 == regexec(&reg, m_cd, 2, pmatch, 0)){
                m_filename = m_cd.Mid(pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so);
        }
        regfree(&reg);
}

CString CHeaderInfo::GetCType()
{
        return m_type;
}

CString CHeaderInfo::GetCSubType()
{
        return m_subtype;
}

void CHeaderInfo::InitMemVal()
{
        m_subject.Empty();
        m_boundary.Empty();
        m_cc.Empty();
        m_bcc.Empty();
        m_cd.Empty();
        m_charset.Empty();
        m_ct.Empty();
        m_cte.Empty();
        m_filename.Empty();
        m_name.Empty();
        m_date.Empty();
        m_from.Empty();
        m_to.Empty();
        m_xmailer.Empty();
        m_replyto.Empty();
        m_msg_id.Empty();
        m_in_reply_to.Empty();
        m_subtype.Empty();
        m_type.Empty();
        m_protocol.Empty();
        m_xattachment.Empty();
        m_decoded_header.RemoveAll();
        m_undecoded_header.RemoveAll();
        m_enc = SEVEN_BIT; // 7bit
}

/* [<][>][^][v][top][bottom][index][help] */