Added code
This commit is contained in:
20
DedupStat.sln
Normal file
20
DedupStat.sln
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DedupStat", "DedupStat\DedupStat.csproj", "{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Debug|x86.ActiveCfg = Debug|x86
|
||||
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Debug|x86.Build.0 = Debug|x86
|
||||
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Release|x86.ActiveCfg = Release|x86
|
||||
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Release|x86.Build.0 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(MonoDevelopProperties) = preSolution
|
||||
StartupItem = DedupStat\DedupStat.csproj
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
41
DedupStat/DedupStat.csproj
Normal file
41
DedupStat/DedupStat.csproj
Normal file
@@ -0,0 +1,41 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
|
||||
<ProductVersion>10.0.0</ProductVersion>
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
<ProjectGuid>{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<RootNamespace>DedupStat</RootNamespace>
|
||||
<AssemblyName>DedupStat</AssemblyName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug</OutputPath>
|
||||
<DefineConstants>DEBUG;</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<Externalconsole>true</Externalconsole>
|
||||
<PlatformTarget>x86</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release</OutputPath>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<Externalconsole>true</Externalconsole>
|
||||
<PlatformTarget>x86</PlatformTarget>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="System" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="Program.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
|
||||
</Project>
|
||||
172
DedupStat/Program.cs
Normal file
172
DedupStat/Program.cs
Normal file
@@ -0,0 +1,172 @@
|
||||
/*******************************************************************************************
|
||||
DedupStat - Shows an estimation of deduplication advantages for specified block size.
|
||||
Copyright (C) 2014 Natalia Portillo
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*******************************************************************************************/
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Collections.Generic;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
|
||||
namespace DedupStat
|
||||
{
|
||||
class MainClass
|
||||
{
|
||||
static Dictionary<string, ulong> hashes;
|
||||
static List<string> files;
|
||||
|
||||
public static void Main(string[] args)
|
||||
{
|
||||
UInt32 blocksize;
|
||||
bool verbose = false;
|
||||
if (args.Length != 2)
|
||||
ShowHelp();
|
||||
else if (!UInt32.TryParse(args [0], out blocksize))
|
||||
ShowHelp();
|
||||
else if (blocksize % 512 != 0)
|
||||
ShowHelp();
|
||||
else if (!Directory.Exists(args [1]))
|
||||
ShowHelp();
|
||||
else
|
||||
{
|
||||
hashes = new Dictionary<string, ulong>();
|
||||
ulong blocks = 0;
|
||||
ulong overhead = 0;
|
||||
ulong totalsize = 0;
|
||||
DateTime start, end;
|
||||
|
||||
Console.WriteLine("DedupStat - Shows an estimation of deduplication advantages for specified block size.");
|
||||
Console.WriteLine("© 2014 Natalia Portillo");
|
||||
Console.WriteLine();
|
||||
start = DateTime.Now;
|
||||
Console.WriteLine("Searching files...");
|
||||
files = new List<string>(Directory.EnumerateFiles(args[1], "*", SearchOption.AllDirectories));
|
||||
Console.WriteLine("{0} files found.", files.Count);
|
||||
Console.WriteLine("Counting {0} bytes sized blocks for found files.", blocksize);
|
||||
|
||||
List<string> wrongfiles = new List<string>();
|
||||
|
||||
foreach (string filePath in files)
|
||||
{
|
||||
if (File.Exists(filePath))
|
||||
{
|
||||
try
|
||||
{
|
||||
FileInfo fi = new FileInfo(filePath);
|
||||
long fileBlocks = (long)Math.Ceiling((double)fi.Length / (double)blocksize);
|
||||
long fileOverhead = fileBlocks * blocksize - fi.Length;
|
||||
if(verbose)
|
||||
Console.WriteLine("File \"{0}\" is {1} bytes, uses {2} blocks of {3} bytes each, for a total of {4} bytes ({5} overhead bytes)",
|
||||
filePath, fi.Length, fileBlocks, blocksize, fileBlocks * blocksize, fileOverhead);
|
||||
|
||||
blocks += (ulong)fileBlocks;
|
||||
overhead += (ulong)fileOverhead;
|
||||
totalsize += (ulong)fi.Length;
|
||||
|
||||
if(verbose)
|
||||
Console.WriteLine("Calculating block checksums");
|
||||
|
||||
FileStream fs = File.OpenRead(filePath);
|
||||
|
||||
byte[] b = new byte[blocksize];
|
||||
int count = 1;
|
||||
int fileUniqueBlocks = 0;
|
||||
int fileDuplicatedBlocks = 0;
|
||||
while (fs.Read(b, 0, (int)blocksize) > 0)
|
||||
{
|
||||
Console.Write("\rCalculating hash of block {0}/{1}", count, fileBlocks);
|
||||
string hash = CalculateSHA1(b);
|
||||
|
||||
if (hashes.ContainsKey(hash))
|
||||
{
|
||||
ulong ref_count;
|
||||
hashes.TryGetValue(hash, out ref_count);
|
||||
hashes.Remove(hash);
|
||||
ref_count++;
|
||||
hashes.Add(hash, ref_count);
|
||||
fileDuplicatedBlocks++;
|
||||
} else
|
||||
{
|
||||
hashes.Add(hash, 1);
|
||||
fileUniqueBlocks++;
|
||||
}
|
||||
|
||||
count++;
|
||||
}
|
||||
Console.Write("\r ");
|
||||
if(verbose)
|
||||
Console.WriteLine("{0} blocks, {1} unique, {2} duplicated", fileBlocks, fileUniqueBlocks, fileDuplicatedBlocks);
|
||||
|
||||
fs.Close();
|
||||
}
|
||||
catch (Exception Ex)
|
||||
{
|
||||
if(verbose)
|
||||
Console.WriteLine("Exception \"{0}\" on file \"{1}\"", Ex.Message, filePath);
|
||||
wrongfiles.Add(filePath);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
wrongfiles.Add(filePath);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (string wrongfile in wrongfiles)
|
||||
files.Remove(wrongfile);
|
||||
|
||||
end = DateTime.Now;
|
||||
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Summary:");
|
||||
Console.WriteLine("{0} files for a total of {1} bytes", files.Count, totalsize);
|
||||
Console.WriteLine("{0} bytes/block, for a total of {1} blocks used, using {2} bytes", blocksize, blocks, blocksize*blocks);
|
||||
Console.WriteLine("{0} wasted bytes (should be {1}, difference is {2})", overhead, (blocks * blocksize) - totalsize, blocks * blocksize - totalsize - overhead);
|
||||
Console.WriteLine("{0} unique blocks, using {1} bytes, {2}%", hashes.Count, hashes.Count * blocksize, (double)hashes.Count*100/(double)blocks);
|
||||
Console.WriteLine("{0} duplicate blocks, using {1} bytes, {2}%", blocks - (ulong)hashes.Count, (blocks - (ulong)hashes.Count) * blocksize, (double)(blocks - (ulong)hashes.Count)*100/(double)blocks);
|
||||
Console.WriteLine("Took {0} seconds, approx. {1} Mb/sec", (end - start).TotalSeconds, totalsize / 1048576 / (end - start).TotalSeconds);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void ShowHelp()
|
||||
{
|
||||
Console.WriteLine("DedupStat - Shows an estimation of deduplication advantages for specified block size.");
|
||||
Console.WriteLine("© 2014 Natalia Portillo");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Usage: dedupstat <block_size> <path>");
|
||||
Console.WriteLine("\t<block_size>\tBlock size in bytes, must be multiple of 512");
|
||||
Console.WriteLine("\t<path>\tFolder path");
|
||||
}
|
||||
|
||||
private static string CalculateSHA1(byte[] block)
|
||||
{
|
||||
using (SHA1Managed sha1 = new SHA1Managed())
|
||||
{
|
||||
byte[] hash = sha1.ComputeHash(block);
|
||||
StringBuilder formatted = new StringBuilder(2 * hash.Length);
|
||||
foreach (byte b in hash)
|
||||
{
|
||||
formatted.AppendFormat("{0:X2}", b);
|
||||
}
|
||||
|
||||
return formatted.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
22
DedupStat/Properties/AssemblyInfo.cs
Normal file
22
DedupStat/Properties/AssemblyInfo.cs
Normal file
@@ -0,0 +1,22 @@
|
||||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
// Information about this assembly is defined by the following attributes.
|
||||
// Change them to the values specific to your project.
|
||||
[assembly: AssemblyTitle("DedupStat")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("Claunia.com")]
|
||||
[assembly: AssemblyProduct("")]
|
||||
[assembly: AssemblyCopyright("© Claunia.com")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
|
||||
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
|
||||
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
|
||||
[assembly: AssemblyVersion("1.0.*")]
|
||||
// The following attributes are used to specify the signing key for the assembly,
|
||||
// if desired. See the Mono documentation for more information about signing.
|
||||
//[assembly: AssemblyDelaySign(false)]
|
||||
//[assembly: AssemblyKeyFile("")]
|
||||
|
||||
Reference in New Issue
Block a user