Added code

This commit is contained in:
2014-02-15 20:04:49 +00:00
parent 92c0169327
commit ba4ba01606
4 changed files with 255 additions and 0 deletions

20
DedupStat.sln Normal file
View File

@@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DedupStat", "DedupStat\DedupStat.csproj", "{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x86 = Debug|x86
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Debug|x86.ActiveCfg = Debug|x86
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Debug|x86.Build.0 = Debug|x86
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Release|x86.ActiveCfg = Release|x86
{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Release|x86.Build.0 = Release|x86
EndGlobalSection
GlobalSection(MonoDevelopProperties) = preSolution
StartupItem = DedupStat\DedupStat.csproj
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,41 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x86</Platform>
<ProductVersion>10.0.0</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>DedupStat</RootNamespace>
<AssemblyName>DedupStat</AssemblyName>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug</OutputPath>
<DefineConstants>DEBUG;</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Externalconsole>true</Externalconsole>
<PlatformTarget>x86</PlatformTarget>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<DebugType>full</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release</OutputPath>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Externalconsole>true</Externalconsole>
<PlatformTarget>x86</PlatformTarget>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
</Project>

172
DedupStat/Program.cs Normal file
View File

@@ -0,0 +1,172 @@
/*******************************************************************************************
DedupStat - Shows an estimation of deduplication advantages for specified block size.
Copyright (C) 2014 Natalia Portillo
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*******************************************************************************************/
using System;
using System.IO;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;
namespace DedupStat
{
class MainClass
{
static Dictionary<string, ulong> hashes;
static List<string> files;
public static void Main(string[] args)
{
UInt32 blocksize;
bool verbose = false;
if (args.Length != 2)
ShowHelp();
else if (!UInt32.TryParse(args [0], out blocksize))
ShowHelp();
else if (blocksize % 512 != 0)
ShowHelp();
else if (!Directory.Exists(args [1]))
ShowHelp();
else
{
hashes = new Dictionary<string, ulong>();
ulong blocks = 0;
ulong overhead = 0;
ulong totalsize = 0;
DateTime start, end;
Console.WriteLine("DedupStat - Shows an estimation of deduplication advantages for specified block size.");
Console.WriteLine("© 2014 Natalia Portillo");
Console.WriteLine();
start = DateTime.Now;
Console.WriteLine("Searching files...");
files = new List<string>(Directory.EnumerateFiles(args[1], "*", SearchOption.AllDirectories));
Console.WriteLine("{0} files found.", files.Count);
Console.WriteLine("Counting {0} bytes sized blocks for found files.", blocksize);
List<string> wrongfiles = new List<string>();
foreach (string filePath in files)
{
if (File.Exists(filePath))
{
try
{
FileInfo fi = new FileInfo(filePath);
long fileBlocks = (long)Math.Ceiling((double)fi.Length / (double)blocksize);
long fileOverhead = fileBlocks * blocksize - fi.Length;
if(verbose)
Console.WriteLine("File \"{0}\" is {1} bytes, uses {2} blocks of {3} bytes each, for a total of {4} bytes ({5} overhead bytes)",
filePath, fi.Length, fileBlocks, blocksize, fileBlocks * blocksize, fileOverhead);
blocks += (ulong)fileBlocks;
overhead += (ulong)fileOverhead;
totalsize += (ulong)fi.Length;
if(verbose)
Console.WriteLine("Calculating block checksums");
FileStream fs = File.OpenRead(filePath);
byte[] b = new byte[blocksize];
int count = 1;
int fileUniqueBlocks = 0;
int fileDuplicatedBlocks = 0;
while (fs.Read(b, 0, (int)blocksize) > 0)
{
Console.Write("\rCalculating hash of block {0}/{1}", count, fileBlocks);
string hash = CalculateSHA1(b);
if (hashes.ContainsKey(hash))
{
ulong ref_count;
hashes.TryGetValue(hash, out ref_count);
hashes.Remove(hash);
ref_count++;
hashes.Add(hash, ref_count);
fileDuplicatedBlocks++;
} else
{
hashes.Add(hash, 1);
fileUniqueBlocks++;
}
count++;
}
Console.Write("\r ");
if(verbose)
Console.WriteLine("{0} blocks, {1} unique, {2} duplicated", fileBlocks, fileUniqueBlocks, fileDuplicatedBlocks);
fs.Close();
}
catch (Exception Ex)
{
if(verbose)
Console.WriteLine("Exception \"{0}\" on file \"{1}\"", Ex.Message, filePath);
wrongfiles.Add(filePath);
}
}
else
{
wrongfiles.Add(filePath);
}
}
foreach (string wrongfile in wrongfiles)
files.Remove(wrongfile);
end = DateTime.Now;
Console.WriteLine();
Console.WriteLine("Summary:");
Console.WriteLine("{0} files for a total of {1} bytes", files.Count, totalsize);
Console.WriteLine("{0} bytes/block, for a total of {1} blocks used, using {2} bytes", blocksize, blocks, blocksize*blocks);
Console.WriteLine("{0} wasted bytes (should be {1}, difference is {2})", overhead, (blocks * blocksize) - totalsize, blocks * blocksize - totalsize - overhead);
Console.WriteLine("{0} unique blocks, using {1} bytes, {2}%", hashes.Count, hashes.Count * blocksize, (double)hashes.Count*100/(double)blocks);
Console.WriteLine("{0} duplicate blocks, using {1} bytes, {2}%", blocks - (ulong)hashes.Count, (blocks - (ulong)hashes.Count) * blocksize, (double)(blocks - (ulong)hashes.Count)*100/(double)blocks);
Console.WriteLine("Took {0} seconds, approx. {1} Mb/sec", (end - start).TotalSeconds, totalsize / 1048576 / (end - start).TotalSeconds);
}
}
public static void ShowHelp()
{
Console.WriteLine("DedupStat - Shows an estimation of deduplication advantages for specified block size.");
Console.WriteLine("© 2014 Natalia Portillo");
Console.WriteLine();
Console.WriteLine("Usage: dedupstat <block_size> <path>");
Console.WriteLine("\t<block_size>\tBlock size in bytes, must be multiple of 512");
Console.WriteLine("\t<path>\tFolder path");
}
private static string CalculateSHA1(byte[] block)
{
using (SHA1Managed sha1 = new SHA1Managed())
{
byte[] hash = sha1.ComputeHash(block);
StringBuilder formatted = new StringBuilder(2 * hash.Length);
foreach (byte b in hash)
{
formatted.AppendFormat("{0:X2}", b);
}
return formatted.ToString();
}
}
}
}

View File

@@ -0,0 +1,22 @@
using System.Reflection;
using System.Runtime.CompilerServices;
// Information about this assembly is defined by the following attributes.
// Change them to the values specific to your project.
[assembly: AssemblyTitle("DedupStat")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Claunia.com")]
[assembly: AssemblyProduct("")]
[assembly: AssemblyCopyright("© Claunia.com")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
// The form "{Major}.{Minor}.*" will automatically update the build and revision,
// and "{Major}.{Minor}.{Build}.*" will update just the revision.
[assembly: AssemblyVersion("1.0.*")]
// The following attributes are used to specify the signing key for the assembly,
// if desired. See the Mono documentation for more information about signing.
//[assembly: AssemblyDelaySign(false)]
//[assembly: AssemblyKeyFile("")]