「文字コードの変換」の編集履歴(バックアップ)一覧に戻る

文字コードの変換 - (2010/05/26 (水) 14:21:09) のソース

-UTF82SJIS
[[http://blog.goo.ne.jp/xmldtp/e/7eaeeb3dabfe975dbc57f73aefb1c059]]からコピペ

 #N88BASIC
 
 Function utf8ToSjis(utf8 As *Byte) As *Byte
 	Dim utf16 As *Word
 	Dim wlen As Long
 	Dim sjis As *Byte
 
 	utf8ToSjis = NULL
 	If utf8 = NULL Then
 		Exit Function
 	End If
 	
 	'=============================='
 	'	UTF8=>UTF16変換	    '
 	'=============================='
 	'	まずは、wlenの長さをとる
    wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0)
     If wlen = 0 Then
 			Exit Function
 	End If
 
 	'	utf16の領域確保
 	utf16 = calloc(wlen + 1)
 	If utf16 = NULL Then
 		Exit Function
 	End If
 
 	'	utf16変換
 	If MultiByteToWideChar(CP_UTF8, 0, utf8, -1, utf16, wlen) <= 0 Then
 		free(utf16)
 		Exit Function
 	End If
 
 	'=============================='
 	'	UTF16=>ShiftJIS変換   '
 	'=============================='
 	'	まずは、長さを取得
 	Dim mlen As Long
 	
 	mlen = WideCharToMultiByte(CP_ACP,0,utf16,-1,NULL,0," ",NULL)
 	If mlen = 0 Then
 		free(utf16)
 		Exit Function
 	End If
 
 	'	領域確保
 	sjis = calloc(mlen + 1)
 	If sjis = NULL Then
 		free(utf16)
 		Exit Function
 	End If
 
 	'	utf16変換
 	If WideCharToMultiByte(CP_ACP,0,utf16,-1,sjis,mlen," ",NULL) <= 0 Then
 		free(utf16)
 		free(sjis)
 		Exit Function
 	End If
 
 	free(utf16)
 	utf8ToSjis = sjis
 End Function
 
 Dim utf8[1000] As Byte
 Dim sjis As *Byte
 Dim hF As HANDLE
 Dim r As DWord
 hF = CreateFile("utf8.txt", GENERIC_READ,0,ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,0)
 ReadFile(hF, utf8,1000, Varptr(r),ByVal 0)
 sjis = utf8ToSjis(utf8)
 Print MakeStr(sjis)
 CloseHandle(hF)
 free(sjis)
 Input sjis



-WinAPIを使うべき処理
ShiftJIS→Unicode(UCS-2)変換
MultiByteToWideChar関数を使う。
[[MultiByteToWideChar(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/MultiByteToWideChar.html]]に詳しい。誰かコピーして。

Unicode(UCS-2)→ShiftJIS変換
WideCharToMultiByte関数を使う。
[[WideCharToMultiByte(外部ページ)>http://yokohama.cool.ne.jp/chokuto/urawaza/api/WideCharToMultiByte.html]]に詳しい。誰かコピーして。

-WinAPIを使わなくてもプログラム側で簡単に変換できるもの
ShiftJIS→EUC-JP変換
ShiftJISとEUC-JPはエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。

 Sub SJIS2EUCJP(src As *Byte,dst As *Byte)
     Dim is=0 As Long,id=0 As Long
     Do
         If src[is]=0 then
             dst[id]=0
             Exit Do
         Else If src[is]<&H80 then
             dst[id]=src[is]
             is++
             id++
         Else If (&HA1<=src[is]) And (src[is]<=&HDF) then
             dst[id]=&H8E
             dst[id+1]=src[is]
             is++
             id+=2
         Else
             Dim a As Long,b As Long
             b=src[is+1]-&H3F
             If b=0 then
                 dst[id]=0
                 Exit Do
             End If
             If b>63 then b--
             a=src[is]*2-&H101
             If a>62 then a-=128
             If b>94 then
                 b-=94
                 a++
             End If
             If (a<1) Or (94<a) Or (b<1) Or (94<b) then
                 dst[id]=0
                 Exit Do
             End If
             dst[id]=a+&HA0
             dst[id+1]=b+&HA0
             is+=2
             id+=2
         End If
     Loop
 End Sub

EUC-JP→ShiftJIS変換
 Sub EUCJP2SJIS(src As *Byte,dst As *Byte)
     Dim is=0 As Long,id=0 As Long
     Do
         If src[is]=0 then
             dst[id]=0
             Exit Do
         Else If src[is]<&H80 then
             dst[id]=src[is]
             is++
             id++
         Else If src[is]=&H8E then
             dst[id]=src[is+1]
             is+=2
             id++
         Else If src[is]=&H8F then'JIS X 0212-1990文字集合だが、Shift-JISには無いため?で代用する。
             dst[id]=Asc("?")
             is+=3
             id++
         Else
             Dim a As Long,b As Long
             a=src[is]-&HA0
             b=src[is+1]-&HA0
             If (a<1) Or (94<a) Or (b<1) Or (94<b) then
                 dst[id]=0
                 Exit Do
             End If
             a--
             If a and 1 then
                 a--
                 b+=94
             End If
             a=(a>>1)+&H81
             If b>63 then b++
             If a>&H9F then a+=64
             dst[id]=a
             dst[id+1]=b+&H3F
             is+=2
             id+=2
         End If
     Loop
 End Sub

Unicode(UCS-2)→Unicode(UTF-8)変換
UCS-2とUTF-8はエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。
 Sub UCS22UTF8(src As *Byte,dst As *Byte)
     Dim is=2 As Long,id=0 As Long
     Dim fBE As Long
     If (src[0]=&HFF) and (src[1]=&HFE) then'BOM
         fBE=0
     Else If (src[0]=&HFE) and (src[1]=&HFF) then
         fBE=1
     Else'default=Big Endian(RFC 2781)
         fBE=1
         is-=2
     End If
     Do
         If src[is]=0 And src[is+1]=0 then
             dst[id]=0
             Exit Do
         Else
             Dim c As Long
             c=src[is+(1-fBE)] As Long*256+src[is+fBE]
             If c<&H80 then
                 dst[id]=c
                 id++
             Else If c<&H800 then
                 dst[id]=&HC0 Or (c>>6)
                 dst[id+1]=&H80 Or (c and &H3F)
                 id+=2
             Else If c<&H10000 then
                 dst[id]=&HE0 Or (c>>12)
                 dst[id+1]=&H80 Or ((c>>6) and &H3F)
                 dst[id+2]=&H80 Or (c and &H3F)
                 id+=3
             Else
                 dst[id]=0
                 Exit Do
             End If
             is+=2
         End If
     Loop
 End Sub

Unicode(UTF-8)→Unicode(UCS-2)
 Sub UTF82UCS2(src As *Byte,dst As *Byte)
     Dim is=0 As Long,id=2 As Long
     If (src[0]=&HEF) And (src[1]=&HBB) And (src[2]=&HBF) then is+=3
     dst[0]=&HFE
     dst[1]=&HFF
     Do
         If src[is]=0 then
             dst[id]=0
             Exit Do
         Else
             Dim c As Long
             If src[is]<&H80 then
                 c=src[is]
                 is++
             Else If src[is]<&HE0 then
                 If src[is+1]=0 then
                     dst[id]=0
                     Exit Do
                 Else
                     c=((src[is] And &H1F)<<6) Or (src[is+1] And &H3F)
                     is+=2
                 End If
             Else If src[is]<&HF0 then
                 If src[is+1]=0 then
                     dst[id]=0
                     Exit Do
                 Else If src[is+2]=0 then
                     dst[id]=0
                     Exit Do
                 Else
                     c=((src[is] And &H0F)<<12) Or ((src[is+1] And &H3F)<<6) Or (src[is+2] And &H3F)
                     is+=3
                 End If
             Else If src[is]<&HF8 then
                 If src[is+1]=0 then
                     dst[id]=0
                     Exit Do
                 Else If src[is+2]=0 then
                     dst[id]=0
                     Exit Do
                 Else If src[is+3]=0 then
                     dst[id]=0
                     Exit Do
                 Else
                     c=((src[is] And &H03)<<18) Or ((src[is+1] And &H3F)<<12) Or ((src[is+2] And &H3F)<<6) Or (src[is+3] And &H3F)
                     is+=4
                 End If
             Else
                 dst[id]=0
                 Exit Do
             End If
             If c>&H10000 then'UCS2には拡張領域が無いため表現できない。
                 dst[id]=0
                 dst[id+1]=Asc("?")
                 id+=2
             Else
                 dst[id]=c>>8
                 dst[id+1]=c And &HFF
                 id+=2
             End If
         End If
     Loop
 End Sub


-nkf32を使って
nkf32は文字コードの各種変換を行う便利ライブラリ。
DLLとして提供されていて、使い易くなってる。
ABも実はこれを使っている。SubOperationフォルダーを参照。

使い方は結構簡単で、例えば次のようにするだけで、何らかの文字コードのファイルをEUCに変換してくれる。
#asciiart(blockquote){
#N88BASIC

'宣言
Declare Sub GetNkfVersion Lib "Nkf32.dll" (verStr As *Byte)
Declare Function SetNkfOption Lib "Nkf32.dll" (optStr As *Byte) As Long
Declare Sub NkfConvert Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)
Declare Sub ToHankaku Lib "Nkf32.dll" (inStr As *Byte)
Declare Sub ToZenkakuKana Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)
Declare Sub ToMime Lib "Nkf32.dll" Alias "EncodeSubject" (outStr As *Byte, inStr As *Byte)
Declare Sub EncodeSubject Lib "Nkf32.dll" (outStr As *Byte, inStr As *Byte)

Dim vstr[478] As Byte
Dim in As *Byte
Dim out As *Byte
Dim size As DWord
GetNkfVersion(vstr)
Print "nkf32のバージョン→" & MakeStr(vstr)
in = LoadFile(VarPtr(size))
If in And size Then Print "読み取りOK" Else End
If SetNkfOption("-e") = 0 Then Print "EUCに変換します"
out = calloc(size+1)
NkfConvert(out, in)
SaveFile(out, lstrlen(out))
Print "おわり"
End

'ファイルを開いて中身を返す
Function LoadFile(sss As *Long) As *Byte
	Dim ofn As OPENFILENAME
	Dim buffer[333] As Byte
	ofn.lStructSize=SizeOf(OPENFILENAME)
	ofn.hwndOwner=GetActiveWindow()
	ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0"
	ofn.nFilterIndex=1
	ofn.lpstrFile=buffer
	ofn.nMaxFile=333
	ofn.lpstrTitle="ファイルを開く"
	ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST
	If GetOpenFileName(ofn)=0 Then Exit Function

	'Open
	Dim hF As HANDLE
	Dim r As Dword
	Dim x As *Byte
	hF = CreateFile( buffer, GENERIC_READ, 0, ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0)
	SetDWord(sss, GetFileSize(hF, 0))
	x = calloc(GetDword(sss)+1)
	ReadFile(hF, x, GetDword(sss), sss, ByVal 0)
	CloseHandle(hF)
	LoadFile = x
End Function

'oooの内容を保存
Function SaveFile(ooo As *Byte, sss As Long) As Long
	Dim ofn As OPENFILENAME
	Dim buffer[333] As Byte
	ofn.lStructSize=SizeOf(OPENFILENAME)
	ofn.hwndOwner=GetActiveWindow()
	ofn.lpstrFilter=Ex"テキスト ファイル(*.txt)\0*.txt\0すべてのファイル(*.*)\0*\0"
	ofn.nFilterIndex=1
	ofn.lpstrFile=buffer
	ofn.nMaxFile=333
	ofn.lpstrTitle="ファイルを保存"
	ofn.Flags=OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST
	If GetSaveFileName(ofn)=0 Then Exit Function

	'Open
	Dim hF As HANDLE
	Dim w As Dword
	hF = CreateFile(buffer, GENERIC_WRITE, 0, ByVal 0, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, 0)
	WriteFile(hF, ooo, sss, VarPtr(w), ByVal 0)
	CloseHandle(hF)
	SaveFile = 44
End Function
}