diff --git a/DedupStat.sln b/DedupStat.sln
new file mode 100644
index 0000000..78b16a1
--- /dev/null
+++ b/DedupStat.sln
@@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DedupStat", "DedupStat\DedupStat.csproj", "{E7C119C0-43C3-4211-8CFC-5FDD3B383F16}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x86 = Debug|x86
+ Release|x86 = Release|x86
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Debug|x86.ActiveCfg = Debug|x86
+ {E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Debug|x86.Build.0 = Debug|x86
+ {E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Release|x86.ActiveCfg = Release|x86
+ {E7C119C0-43C3-4211-8CFC-5FDD3B383F16}.Release|x86.Build.0 = Release|x86
+ EndGlobalSection
+ GlobalSection(MonoDevelopProperties) = preSolution
+ StartupItem = DedupStat\DedupStat.csproj
+ EndGlobalSection
+EndGlobal
diff --git a/DedupStat/DedupStat.csproj b/DedupStat/DedupStat.csproj
new file mode 100644
index 0000000..d2bd9db
--- /dev/null
+++ b/DedupStat/DedupStat.csproj
@@ -0,0 +1,41 @@
+
+
+
+ Debug
+ x86
+ 10.0.0
+ 2.0
+ {E7C119C0-43C3-4211-8CFC-5FDD3B383F16}
+ Exe
+ DedupStat
+ DedupStat
+
+
+ true
+ full
+ false
+ bin\Debug
+ DEBUG;
+ prompt
+ 4
+ true
+ x86
+
+
+ full
+ true
+ bin\Release
+ prompt
+ 4
+ true
+ x86
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/DedupStat/Program.cs b/DedupStat/Program.cs
new file mode 100644
index 0000000..1fcba40
--- /dev/null
+++ b/DedupStat/Program.cs
@@ -0,0 +1,172 @@
+/*******************************************************************************************
+ DedupStat - Shows an estimation of deduplication advantages for specified block size.
+ Copyright (C) 2014 Natalia Portillo
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+*******************************************************************************************/
+
+using System;
+using System.IO;
+using System.Collections.Generic;
+using System.Security.Cryptography;
+using System.Text;
+
+namespace DedupStat
+{
+ class MainClass
+ {
+ static Dictionary hashes;
+ static List files;
+
+ public static void Main(string[] args)
+ {
+ UInt32 blocksize;
+ bool verbose = false;
+ if (args.Length != 2)
+ ShowHelp();
+ else if (!UInt32.TryParse(args [0], out blocksize))
+ ShowHelp();
+ else if (blocksize % 512 != 0)
+ ShowHelp();
+ else if (!Directory.Exists(args [1]))
+ ShowHelp();
+ else
+ {
+ hashes = new Dictionary();
+ ulong blocks = 0;
+ ulong overhead = 0;
+ ulong totalsize = 0;
+ DateTime start, end;
+
+ Console.WriteLine("DedupStat - Shows an estimation of deduplication advantages for specified block size.");
+ Console.WriteLine("© 2014 Natalia Portillo");
+ Console.WriteLine();
+ start = DateTime.Now;
+ Console.WriteLine("Searching files...");
+ files = new List(Directory.EnumerateFiles(args[1], "*", SearchOption.AllDirectories));
+ Console.WriteLine("{0} files found.", files.Count);
+ Console.WriteLine("Counting {0} bytes sized blocks for found files.", blocksize);
+
+ List wrongfiles = new List();
+
+ foreach (string filePath in files)
+ {
+ if (File.Exists(filePath))
+ {
+ try
+ {
+ FileInfo fi = new FileInfo(filePath);
+ long fileBlocks = (long)Math.Ceiling((double)fi.Length / (double)blocksize);
+ long fileOverhead = fileBlocks * blocksize - fi.Length;
+ if(verbose)
+ Console.WriteLine("File \"{0}\" is {1} bytes, uses {2} blocks of {3} bytes each, for a total of {4} bytes ({5} overhead bytes)",
+ filePath, fi.Length, fileBlocks, blocksize, fileBlocks * blocksize, fileOverhead);
+
+ blocks += (ulong)fileBlocks;
+ overhead += (ulong)fileOverhead;
+ totalsize += (ulong)fi.Length;
+
+ if(verbose)
+ Console.WriteLine("Calculating block checksums");
+
+ FileStream fs = File.OpenRead(filePath);
+
+ byte[] b = new byte[blocksize];
+ int count = 1;
+ int fileUniqueBlocks = 0;
+ int fileDuplicatedBlocks = 0;
+ while (fs.Read(b, 0, (int)blocksize) > 0)
+ {
+ Console.Write("\rCalculating hash of block {0}/{1}", count, fileBlocks);
+ string hash = CalculateSHA1(b);
+
+ if (hashes.ContainsKey(hash))
+ {
+ ulong ref_count;
+ hashes.TryGetValue(hash, out ref_count);
+ hashes.Remove(hash);
+ ref_count++;
+ hashes.Add(hash, ref_count);
+ fileDuplicatedBlocks++;
+ } else
+ {
+ hashes.Add(hash, 1);
+ fileUniqueBlocks++;
+ }
+
+ count++;
+ }
+ Console.Write("\r ");
+ if(verbose)
+ Console.WriteLine("{0} blocks, {1} unique, {2} duplicated", fileBlocks, fileUniqueBlocks, fileDuplicatedBlocks);
+
+ fs.Close();
+ }
+ catch (Exception Ex)
+ {
+ if(verbose)
+ Console.WriteLine("Exception \"{0}\" on file \"{1}\"", Ex.Message, filePath);
+ wrongfiles.Add(filePath);
+ }
+ }
+ else
+ {
+ wrongfiles.Add(filePath);
+ }
+ }
+
+ foreach (string wrongfile in wrongfiles)
+ files.Remove(wrongfile);
+
+ end = DateTime.Now;
+
+ Console.WriteLine();
+ Console.WriteLine("Summary:");
+ Console.WriteLine("{0} files for a total of {1} bytes", files.Count, totalsize);
+ Console.WriteLine("{0} bytes/block, for a total of {1} blocks used, using {2} bytes", blocksize, blocks, blocksize*blocks);
+ Console.WriteLine("{0} wasted bytes (should be {1}, difference is {2})", overhead, (blocks * blocksize) - totalsize, blocks * blocksize - totalsize - overhead);
+ Console.WriteLine("{0} unique blocks, using {1} bytes, {2}%", hashes.Count, hashes.Count * blocksize, (double)hashes.Count*100/(double)blocks);
+ Console.WriteLine("{0} duplicate blocks, using {1} bytes, {2}%", blocks - (ulong)hashes.Count, (blocks - (ulong)hashes.Count) * blocksize, (double)(blocks - (ulong)hashes.Count)*100/(double)blocks);
+ Console.WriteLine("Took {0} seconds, approx. {1} Mb/sec", (end - start).TotalSeconds, totalsize / 1048576 / (end - start).TotalSeconds);
+ }
+
+ }
+
+ public static void ShowHelp()
+ {
+ Console.WriteLine("DedupStat - Shows an estimation of deduplication advantages for specified block size.");
+ Console.WriteLine("© 2014 Natalia Portillo");
+ Console.WriteLine();
+ Console.WriteLine("Usage: dedupstat ");
+ Console.WriteLine("\t\tBlock size in bytes, must be multiple of 512");
+ Console.WriteLine("\t\tFolder path");
+ }
+
+ private static string CalculateSHA1(byte[] block)
+ {
+ using (SHA1Managed sha1 = new SHA1Managed())
+ {
+ byte[] hash = sha1.ComputeHash(block);
+ StringBuilder formatted = new StringBuilder(2 * hash.Length);
+ foreach (byte b in hash)
+ {
+ formatted.AppendFormat("{0:X2}", b);
+ }
+
+ return formatted.ToString();
+ }
+ }
+ }
+}
diff --git a/DedupStat/Properties/AssemblyInfo.cs b/DedupStat/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..1a99392
--- /dev/null
+++ b/DedupStat/Properties/AssemblyInfo.cs
@@ -0,0 +1,22 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+
+// Information about this assembly is defined by the following attributes.
+// Change them to the values specific to your project.
+[assembly: AssemblyTitle("DedupStat")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("Claunia.com")]
+[assembly: AssemblyProduct("")]
+[assembly: AssemblyCopyright("© Claunia.com")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+// The assembly version has the format "{Major}.{Minor}.{Build}.{Revision}".
+// The form "{Major}.{Minor}.*" will automatically update the build and revision,
+// and "{Major}.{Minor}.{Build}.*" will update just the revision.
+[assembly: AssemblyVersion("1.0.*")]
+// The following attributes are used to specify the signing key for the assembly,
+// if desired. See the Mono documentation for more information about signing.
+//[assembly: AssemblyDelaySign(false)]
+//[assembly: AssemblyKeyFile("")]
+