#include <fcntl.h> // _O_WTEXT
#include <io.h> // _setmode
#include <stdio.h> // _fileno
#include <string.h> // memcmp
#include <tchar.h>
#include <map>
#include <set>
#include <string>
#define FIRST_SIZE 4096
#define SECOND_SIZE (10 * 1024 * 1024)
// 型定義
typedef struct { // ファイル情報構造体
std::wstring strPath;
int iDelFlag; // 削除フラグ
int iDupFlag; // 重複フラグ
} FileInfo;
typedef std::multimap<_fsize_t, FileInfo> mmfsfi;
typedef std::set<std::wstring> setstr;
// 関数プロトタイプ宣言
int files(const _TCHAR *ptcDir);
int filecmp(const _TCHAR *ptcFile1, const _TCHAR *ptcFile2);
int filedel(void);
// グローバル変数
mmfsfi g_mmap;
setstr g_set;
int g_iDelFlag = 0;
//==============================================================================
int _tmain(int argc, _TCHAR *argv[])
{
mmfsfi::iterator it;
mmfsfi::iterator itTarget;
FileInfo *pfi;
FileInfo *pfiTarget;
_TCHAR atcDir[_MAX_PATH];
size_t size;
int iCount;
int i;
// BOMなしUTF-16LE
_setmode(_fileno(stdout), _O_WTEXT);
_setmode(_fileno(stderr), _O_WTEXT);
if (argc < 2) {
_ftprintf(stderr, _T("usage: finddup [-d] dir [...]\n"));
return 1;
}
for (i = 1; i < argc; i++) {
if (argv[i][0] == _T('-')) {
if (argv[i][1] == _T('d')) {
g_iDelFlag = 1;
}
} else {
_tcscpy_s(atcDir, argv[i]);
size = _tcslen(atcDir);
if (0 < size && atcDir[size - 1] == _T('\\')) {
atcDir[size - 1] = _T('\0');
}
files(atcDir);
}
}
for (it = g_mmap.begin(); it != g_mmap.end(); it++) {
pfi = &(it->second);
if (pfi->iDupFlag) continue;
iCount = 0;
for (itTarget = it; ++itTarget != g_mmap.end(); ) {
if (itTarget->first != it->first) break;
pfiTarget = &(itTarget->second);
if (pfiTarget->iDupFlag) continue;
if (!filecmp(pfiTarget->strPath.c_str(), pfi->strPath.c_str())) {
pfiTarget->iDupFlag = 1;
if (iCount == 0) {
_tprintf(_T("\n%u KB (%u B)\n"),
(it->first + 1023) / 1024, it->first);
_tprintf(_T("%s\n"), pfi->strPath.c_str());
}
_tprintf(_T("%s\n"), pfiTarget->strPath.c_str());
iCount++;
}
}
}
if (g_iDelFlag) filedel();
return 0;
}
//------------------------------------------------------------------------------
int files(const _TCHAR *ptcDir)
{
std::pair<setstr::iterator, bool> pair;
struct _wfinddata_t fd;
FileInfo fi;
_TCHAR atcPath[_MAX_PATH];
intptr_t handle;
size_t sizeDir;
sizeDir = _tcslen(ptcDir);
if (_MAX_PATH <= sizeDir + 4) {
_ftprintf(stderr, _T("error: パスが長過ぎます。%u[%s]\n"), sizeDir, ptcDir);
return -1;
}
_stprintf_s(atcPath, _T("%s\\*.*"), ptcDir);
fi.iDelFlag = g_iDelFlag;
fi.iDupFlag = 0;
handle = _tfindfirst(atcPath, &fd);
if (handle == -1) {
_ftprintf(stderr, _T("error: _tfindfirst[%s]\n"), ptcDir);
return -1;
}
do {
if (_MAX_PATH <= sizeDir + 1 + _tcslen(fd.name)) {
_ftprintf(stderr, _T("error: パスが長過ぎます。[%s][%s]\n"),
ptcDir, fd.name);
continue;
}
_stprintf_s(atcPath, _T("%s\\%s"), ptcDir, fd.name);
if (fd.attrib & _A_SUBDIR) {
if (_tcscmp(fd.name, _T(".")) && _tcscmp(fd.name, _T(".."))) {
files(atcPath);
}
} else {
pair = g_set.insert(atcPath);
if (pair.second == true) {
fi.strPath = atcPath;
g_mmap.insert(mmfsfi::value_type(fd.size, fi));
}
}
} while (_tfindnext(handle, &fd) == 0);
_findclose(handle);
return 0;
}
//------------------------------------------------------------------------------
int filecmp(const _TCHAR *ptcFile1, const _TCHAR *ptcFile2)
{
static char acBuf1[SECOND_SIZE];
static char acBuf2[SECOND_SIZE];
FILE *pFile1 = NULL;
FILE *pFile2 = NULL;
size_t sizeRead1;
size_t sizeRead2;
size_t sizeCount;
size_t size;
int iRetVal = -1;
if (_tfopen_s(&pFile1, ptcFile1, _T("rb")) != 0) {
_ftprintf(stderr, _T("%s を開けません\n"), ptcFile1);
goto Exit;
}
if (_tfopen_s(&pFile2, ptcFile2, _T("rb")) != 0) {
_ftprintf(stderr, _T("%s を開けません\n"), ptcFile2);
goto Exit;
}
size = 0;
for (sizeCount = FIRST_SIZE; ; sizeCount = SECOND_SIZE) {
sizeRead1 = fread(acBuf1, 1, sizeCount, pFile1);
if (ferror(pFile1) != 0) {
_ftprintf(stderr, _T("error: fread[%s]\n"), ptcFile1);
goto Exit;
}
sizeRead2 = fread(acBuf2, 1, sizeCount, pFile2);
if (ferror(pFile2) != 0) {
_ftprintf(stderr, _T("error: fread[%s]\n"), ptcFile2);
goto Exit;
}
if (sizeRead1 != sizeRead2) {
iRetVal = 1;
break;
}
if (memcmp(acBuf1, acBuf2, sizeRead1) != 0) {
if (sizeCount != FIRST_SIZE) {
_ftprintf(stderr, _T("\n%u\n[%s]\n[%s]\n"),
size, ptcFile1, ptcFile2);
}
iRetVal = 1;
break;
}
if (sizeRead1 != sizeCount) {
iRetVal = 0;
break;
}
size += sizeCount;
}
Exit:
if (pFile2) fclose(pFile2);
if (pFile1) fclose(pFile1);
return iRetVal;
}
//------------------------------------------------------------------------------
int filedel(void)
{
mmfsfi::iterator it;
FileInfo *pfi;
FILE *pFile;
if (_tfopen_s(&pFile, _T("finddup.log"), _T("wt,ccs=UNICODE"))) {
_ftprintf(stderr, _T("ログファイルの作成に失敗しました\n"));
return -1;
}
for(it = g_mmap.begin(); it != g_mmap.end(); it++) {
pfi = &(it->second);
if (pfi->iDupFlag && pfi->iDelFlag) {
_ftprintf(pFile, _T("del \"%s\"\n"), pfi->strPath.c_str());
}
}
fclose(pFile);
return 0;
}