「文字コードの変換」の編集履歴(バックアップ)一覧に戻る

文字コードの変換 - (2010/01/25 (月) 06:28:31) の編集履歴(バックアップ)


  • UTF82SJIS

#N88BASIC

Function utf8ToSjis(utf8 As *Byte) As *Byte
	Dim utf16 As *Word
	Dim wlen As Long
	Dim sjis As *Byte

	utf8ToSjis = NULL
	If utf8 = NULL Then
		Exit Function
	End If
	
	'=============================='
	'	UTF8=>UTF16変換	    '
	'=============================='
	'	まずは、wlenの長さをとる
   wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0)
    If wlen = 0 Then
			Exit Function
	End If

	'	utf16の領域確保
	utf16 = calloc(wlen + 1)
	If utf16 = NULL Then
		Exit Function
	End If

	'	utf16変換
	If MultiByteToWideChar(CP_UTF8, 0, utf8, -1, utf16, wlen) <= 0 Then
		free(utf16)
		Exit Function
	End If

	'=============================='
	'	UTF16=>ShiftJIS変換   '
	'=============================='
	'	まずは、長さを取得
	Dim mlen As Long
	
	mlen = WideCharToMultiByte(CP_ACP,0,utf16,-1,NULL,0," ",NULL)
	If mlen = 0 Then
		free(utf16)
		Exit Function
	End If

	'	領域確保
	sjis = calloc(mlen + 1)
	If sjis = NULL Then
		free(utf16)
		Exit Function
	End If

	'	utf16変換
	If WideCharToMultiByte(CP_ACP,0,utf16,-1,sjis,mlen," ",NULL) <= 0 Then
		free(utf16)
		free(sjis)
		Exit Function
	End If

	free(utf16)
	utf8ToSjis = sjis
End Function

Dim utf8[1000] As Byte
Dim sjis As *Byte
Dim hF As HANDLE
Dim r As DWord
hF = CreateFile("utf8.txt", GENERIC_READ,0,ByVal 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,0)
ReadFile(hF, utf8,1000, Varptr(r),ByVal 0)
sjis = utf8ToSjis(utf8)
Print MakeStr(sjis)
CloseHandle(hF)
free(sjis)
Input sjis




  • WinAPIを使うべき処理
ShiftJIS→Unicode(UCS-2)変換
MultiByteToWideChar関数を使う。
MultiByteToWideChar(外部ページ) に詳しい。誰かコピーして。

Unicode(UCS-2)→ShiftJIS変換
WideCharToMultiByte関数を使う。
WideCharToMultiByte(外部ページ) に詳しい。誰かコピーして。

  • WinAPIを使わなくてもプログラム側で簡単に変換できるもの
ShiftJIS→EUC-JP変換
ShiftJISとEUC-JPはエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。

Sub SJIS2EUCJP(src As *Byte,dst As *Byte)
    Dim is=0 As Long,id=0 As Long
    Do
        If src[is]=0 then
            dst[id]=0
            Exit Do
        Else If src[is]<&H80 then
            dst[id]=src[is]
            is++
            id++
        Else If (&HA1<=src[is]) And (src[is]<=&HDF) then
            dst[id]=&H8E
            dst[id+1]=src[is]
            is++
            id+=2
        Else
            Dim a As Long,b As Long
            b=src[is+1]-&H3F
            If b=0 then
                dst[id]=0
                Exit Do
            End If
            If b>63 then b--
            a=src[is]*2-&H101
            If a>62 then a-=128
            If b>94 then
                b-=94
                a++
            End If
            If (a<1) Or (94<a) Or (b<1) Or (94<b) then
                dst[id]=0
                Exit Do
            End If
            dst[id]=a+&HA0
            dst[id+1]=b+&HA0
            is+=2
            id+=2
        End If
    Loop
End Sub

EUC-JP→ShiftJIS変換
Sub EUCJP2SJIS(src As *Byte,dst As *Byte)
    Dim is=0 As Long,id=0 As Long
    Do
        If src[is]=0 then
            dst[id]=0
            Exit Do
        Else If src[is]<&H80 then
            dst[id]=src[is]
            is++
            id++
        Else If src[is]=&H8E then
            dst[id]=src[is+1]
            is+=2
            id++
        Else If src[is]=&H8F then'JIS X 0212-1990文字集合だが、Shift-JISには無いため?で代用する。
            dst[id]=Asc("?")
            is+=3
            id++
        Else
            Dim a As Long,b As Long
            a=src[is]-&HA0
            b=src[is+1]-&HA0
            If (a<1) Or (94<a) Or (b<1) Or (94<b) then
                dst[id]=0
                Exit Do
            End If
            a--
            If a and 1 then
                a--
                b+=94
            End If
            a=(a>>1)+&H81
            If b>63 then b++
            If a>&H9F then a+=64
            dst[id]=a
            dst[id+1]=b+&H3F
            is+=2
            id+=2
        End If
    Loop
End Sub

Unicode(UCS-2)→Unicode(UTF-8)変換
UCS-2とUTF-8はエンコードが違うだけで同じ符号体系を用いている。その為、この変換は比較的容易い。
Sub UCS22UTF8(src As *Byte,dst As *Byte)
    Dim is=2 As Long,id=0 As Long
    Dim fBE As Long
    If (src[0]=&HFF) and (src[1]=&HFE) then'BOM
        fBE=0
    Else If (src[0]=&HFE) and (src[1]=&HFF) then
        fBE=1
    Else'default=Big Endian(RFC 2781)
        fBE=1
        is-=2
    End If
    Do
        If src[is]=0 And src[is+1]=0 then
            dst[id]=0
            Exit Do
        Else
            Dim c As Long
            c=src[is+(1-fBE)] As Long*256+src[is+fBE]
            If c<&H80 then
                dst[id]=c
                id++
            Else If c<&H800 then
                dst[id]=&HC0 Or (c>>6)
                dst[id+1]=&H80 Or (c and &H3F)
                id+=2
            Else If c<&H10000 then
                dst[id]=&HE0 Or (c>>12)
                dst[id+1]=&H80 Or ((c>>6) and &H3F)
                dst[id+2]=&H80 Or (c and &H3F)
                id+=3
            Else
                dst[id]=0
                Exit Do
            End If
            is+=2
        End If
    Loop
End Sub

Unicode(UTF-8)→Unicode(UCS-2)
Sub UTF82UCS2(src As *Byte,dst As *Byte)
    Dim is=0 As Long,id=2 As Long
    If (src[0]=&HEF) And (src[1]=&HBB) And (src[2]=&HBF) then is+=3
    dst[0]=&HFE
    dst[1]=&HFF
    Do
        If src[is]=0 then
            dst[id]=0
            Exit Do
        Else
            Dim c As Long
            If src[is]<&H80 then
                c=src[is]
                is++
            Else If src[is]<&HE0 then
                If src[is+1]=0 then
                    dst[id]=0
                    Exit Do
                Else
                    c=((src[is] And &H1F)<<6) Or (src[is+1] And &H3F)
                    is+=2
                End If
            Else If src[is]<&HF0 then
                If src[is+1]=0 then
                    dst[id]=0
                    Exit Do
                Else If src[is+2]=0 then
                    dst[id]=0
                    Exit Do
                Else
                    c=((src[is] And &H0F)<<12) Or ((src[is+1] And &H3F)<<6) Or (src[is+2] And &H3F)
                    is+=3
                End If
            Else If src[is]<&HF8 then
                If src[is+1]=0 then
                    dst[id]=0
                    Exit Do
                Else If src[is+2]=0 then
                    dst[id]=0
                    Exit Do
                Else If src[is+3]=0 then
                    dst[id]=0
                    Exit Do
                Else
                    c=((src[is] And &H03)<<18) Or ((src[is+1] And &H3F)<<12) Or ((src[is+2] And &H3F)<<6) Or (src[is+3] And &H3F)
                    is+=4
                End If
            Else
                dst[id]=0
                Exit Do
            End If
            If c>&H10000 then'UCS2には拡張領域が無いため表現できない。
                dst[id]=0
                dst[id+1]=Asc("?")
                id+=2
            Else
                dst[id]=c>>8
                dst[id+1]=c And &HFF
                id+=2
            End If
        End If
    Loop
End Sub