[ALL] Add game dedupe

This commit is contained in:
Matt Nadareski
2017-08-29 11:46:01 -07:00
parent 66524bf034
commit e7683ec0fb
14 changed files with 93 additions and 65 deletions

View File

@@ -404,7 +404,7 @@ namespace RombaSharp
DatFile datroot = new DatFile { Type = "SuperDAT", };
// TODO: All instances of Hash.DeepHashes should be made into 0x0 eventually
datroot.PopulateFromDir(_dats, Hash.DeepHashes, false, false, false, SkipFileType.None, false, false, _tmpdir, false, null);
datroot.BucketBy(SortedBy.SHA1, false /* mergeroms */);
datroot.BucketBy(SortedBy.SHA1, DedupeType.None);
// Create a List of dat hashes in the database (SHA-1)
List<string> databaseDats = new List<string>();
@@ -434,7 +434,7 @@ namespace RombaSharp
unneeded.Add(hash);
}
}
datroot.BucketBy(SortedBy.Game, false /* mergeroms */, norename: true);
datroot.BucketBy(SortedBy.Game, DedupeType.None, norename: true);
Globals.Logger.User("Populating complete in {0}", DateTime.Now.Subtract(start).ToString(@"hh\:mm\:ss\.fffff"));
@@ -619,7 +619,7 @@ namespace RombaSharp
DatFile depot = new DatFile();
// TODO: All instances of Hash.DeepHashes should be made into 0x0 eventually
depot.PopulateFromDir(depotname, Hash.DeepHashes, false, false, true, SkipFileType.None, false, false, _tmpdir, false, null);
depot.BucketBy(SortedBy.SHA1, false /* mergeroms */);
depot.BucketBy(SortedBy.SHA1, DedupeType.None);
// Set the base queries to use
string crcquery = "INSERT OR IGNORE INTO crc (crc) VALUES";

View File

@@ -210,6 +210,16 @@
#region DatFile related
/// <summary>
/// Determines the DAT deduplication type
/// </summary>
public enum DedupeType
{
None = 0,
Full,
Game
}
/// <summary>
/// Determines forcemerging tag for DAT output
/// </summary>

View File

@@ -29,7 +29,7 @@ namespace SabreTools.Library.Dats
private ForcePacking _forcePacking;
private DatFormat _datFormat;
private bool _excludeOf;
private bool _mergeRoms;
private DedupeType _dedupeRoms;
private Hash _stripHash;
private bool _oneGameOneRegion;
private List<string> _regions = new List<string>();
@@ -162,10 +162,10 @@ namespace SabreTools.Library.Dats
get { return _excludeOf; }
set { _excludeOf = value; }
}
public bool MergeRoms
public DedupeType DedupeRoms
{
get { return _mergeRoms; }
set { _mergeRoms = value; }
get { return _dedupeRoms; }
set { _dedupeRoms = value; }
}
public Hash StripHash
{
@@ -548,7 +548,7 @@ namespace SabreTools.Library.Dats
_forcePacking = datFile.ForcePacking;
_excludeOf = datFile.ExcludeOf;
_datFormat = datFile.DatFormat;
_mergeRoms = datFile.MergeRoms;
_dedupeRoms = datFile.DedupeRoms;
_stripHash = datFile.StripHash;
_sortedBy = SortedBy.Default;
_useGame = datFile.UseGame;

View File

@@ -351,7 +351,7 @@ namespace SabreTools.Library.Dats
&& ((_itemType == ItemType.Rom && !String.IsNullOrEmpty(((Rom)this).SHA512))
|| (_itemType == ItemType.Disk && !String.IsNullOrEmpty(((Disk)this).SHA512))))
{
datdata.BucketBy(SortedBy.SHA512, false /* mergeroms */);
datdata.BucketBy(SortedBy.SHA512, DedupeType.None);
}
// If all items are supposed to have a SHA-384, we sort by that
@@ -359,7 +359,7 @@ namespace SabreTools.Library.Dats
&& ((_itemType == ItemType.Rom && !String.IsNullOrEmpty(((Rom)this).SHA384))
|| (_itemType == ItemType.Disk && !String.IsNullOrEmpty(((Disk)this).SHA384))))
{
datdata.BucketBy(SortedBy.SHA384, false /* mergeroms */);
datdata.BucketBy(SortedBy.SHA384, DedupeType.None);
}
// If all items are supposed to have a SHA-256, we sort by that
@@ -370,12 +370,12 @@ namespace SabreTools.Library.Dats
if (_itemType == ItemType.Rom)
{
key = ((Rom)this).SHA256;
datdata.BucketBy(SortedBy.SHA256, false /* mergeroms */);
datdata.BucketBy(SortedBy.SHA256, DedupeType.None);
}
else
{
key = ((Disk)this).SHA256;
datdata.BucketBy(SortedBy.SHA256, false /* mergeroms */);
datdata.BucketBy(SortedBy.SHA256, DedupeType.None);
}
}
@@ -387,12 +387,12 @@ namespace SabreTools.Library.Dats
if (_itemType == ItemType.Rom)
{
key = ((Rom)this).SHA1;
datdata.BucketBy(SortedBy.SHA1, false /* mergeroms */);
datdata.BucketBy(SortedBy.SHA1, DedupeType.None);
}
else
{
key = ((Disk)this).SHA1;
datdata.BucketBy(SortedBy.SHA1, false /* mergeroms */);
datdata.BucketBy(SortedBy.SHA1, DedupeType.None);
}
}
@@ -404,12 +404,12 @@ namespace SabreTools.Library.Dats
if (_itemType == ItemType.Rom)
{
key = ((Rom)this).MD5;
datdata.BucketBy(SortedBy.MD5, false /* mergeroms */);
datdata.BucketBy(SortedBy.MD5, DedupeType.None);
}
else
{
key = ((Disk)this).MD5;
datdata.BucketBy(SortedBy.MD5, false /* mergeroms */);
datdata.BucketBy(SortedBy.MD5, DedupeType.None);
}
}
}
@@ -491,21 +491,21 @@ namespace SabreTools.Library.Dats
if (_itemType == ItemType.Disk)
{
key = ((Disk)this).MD5;
datdata.BucketBy(SortedBy.MD5, false /* mergeroms */);
datdata.BucketBy(SortedBy.MD5, DedupeType.None);
}
// If we've gotten here and we have a Rom, sort by CRC
else if (_itemType == ItemType.Rom)
{
key = ((Rom)this).CRC;
datdata.BucketBy(SortedBy.CRC, false /* mergeroms */);
datdata.BucketBy(SortedBy.CRC, DedupeType.None);
}
// Otherwise, we use -1 as the key
else
{
key = "-1";
datdata.BucketBy(SortedBy.Size, false /* mergeroms */);
datdata.BucketBy(SortedBy.Size, DedupeType.None);
}
}

View File

@@ -119,7 +119,7 @@ namespace SabreTools.Library.Dats
datHeaders[i] = new DatFile
{
DatFormat = (DatFormat != 0 ? DatFormat : 0),
MergeRoms = MergeRoms,
DedupeRoms = DedupeRoms,
};
datHeaders[i].Parse(input.Split('¬')[0], i, 0, splitType, keep: true, clean: clean, remUnicode: remUnicode, descAsName: descAsName);
@@ -187,7 +187,7 @@ namespace SabreTools.Library.Dats
Globals.Logger.User("Populating base DAT complete in {0}", DateTime.Now.Subtract(start).ToString(@"hh\:mm\:ss\.fffff"));
// For comparison's sake, we want to use CRC as the base ordering
BucketBy(SortedBy.CRC, true);
BucketBy(SortedBy.CRC, DedupeType.Full);
// Now we want to compare each input DAT against the base
List<string> inputFileNames = FileTools.GetOnlyFilesFromInputs(inputPaths, appendparent: true);
@@ -203,7 +203,7 @@ namespace SabreTools.Library.Dats
intDat.Parse(splitpath[0], 1, 1, keep: true, clean: clean, remUnicode: remUnicode, descAsName: descAsName);
// For comparison's sake, we want to use CRC as the base ordering
intDat.BucketBy(SortedBy.CRC, true);
intDat.BucketBy(SortedBy.CRC, DedupeType.Full);
// Then we do a hashwise comparison against the base DAT
List<string> keys = intDat.Keys.ToList();

View File

@@ -25,10 +25,10 @@ namespace SabreTools.Library.Dats
/// Take the arbitrarily sorted Files Dictionary and convert to one sorted by a user-defined method
/// </summary>
/// <param name="bucketBy">SortedBy enum representing how to sort the individual items</param>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
/// <param name="deduperoms">Dedupe type that should be used</param>
/// <param name="lower">True if the key should be lowercased (default), false otherwise</param>
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
public void BucketBy(SortedBy bucketBy, bool mergeroms, bool lower = true, bool norename = true)
public void BucketBy(SortedBy bucketBy, DedupeType deduperoms, bool lower = true, bool norename = true)
{
// If we already have the right sorting, trust it
if (_sortedBy == bucketBy)
@@ -48,7 +48,7 @@ namespace SabreTools.Library.Dats
// Create the temporary dictionary to sort into
SortedDictionary<string, List<DatItem>> sortable = new SortedDictionary<string, List<DatItem>>();
Globals.Logger.User("Organizing roms by {0}" + (mergeroms ? " and merging" : ""), bucketBy);
Globals.Logger.User("Organizing roms by {0}" + (deduperoms != DedupeType.None ? " and merging" : ""), bucketBy);
// First do the initial sort of all of the roms
List<string> keys = Keys.ToList();
@@ -152,7 +152,7 @@ namespace SabreTools.Library.Dats
DatItem.Sort(ref sortedlist, false);
// If we're merging the roms, do so
if (mergeroms)
if (deduperoms == DedupeType.Full || (deduperoms == DedupeType.Game && bucketBy == SortedBy.Game))
{
sortedlist = DatItem.Merge(sortedlist);
}
@@ -384,8 +384,8 @@ namespace SabreTools.Library.Dats
/// <summary>
/// Use cdevice_ref tags to get full non-merged sets and remove parenting tags
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
public void CreateDeviceNonMergedSets(bool mergeroms)
/// <param name="mergeroms">Dedupe type to be used</param>
public void CreateDeviceNonMergedSets(DedupeType mergeroms)
{
Globals.Logger.User("Creating device non-merged sets from the DAT");
@@ -406,8 +406,8 @@ namespace SabreTools.Library.Dats
/// <summary>
/// Use cloneof tags to create non-merged sets and remove the tags plus using the device_ref tags to get full sets
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
public void CreateFullyNonMergedSets(bool mergeroms)
/// <param name="mergeroms">Dedupe type to be used</param>
public void CreateFullyNonMergedSets(DedupeType mergeroms)
{
Globals.Logger.User("Creating fully non-merged sets from the DAT");
@@ -432,8 +432,8 @@ namespace SabreTools.Library.Dats
/// <summary>
/// Use cloneof tags to create merged sets and remove the tags
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
public void CreateMergedSets(bool mergeroms)
/// <param name="mergeroms">Dedupe type to be used</param>
public void CreateMergedSets(DedupeType mergeroms)
{
Globals.Logger.User("Creating merged sets from the DAT");
@@ -454,8 +454,8 @@ namespace SabreTools.Library.Dats
/// <summary>
/// Use cloneof tags to create non-merged sets and remove the tags
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
public void CreateNonMergedSets(bool mergeroms)
/// <param name="mergeroms">Dedupe type to be used</param>
public void CreateNonMergedSets(DedupeType mergeroms)
{
Globals.Logger.User("Creating non-merged sets from the DAT");
@@ -476,8 +476,8 @@ namespace SabreTools.Library.Dats
/// <summary>
/// Use cloneof and romof tags to create split sets and remove the tags
/// </summary>
/// <param name="mergeroms">True if roms should be deduped, false otherwise</param>
public void CreateSplitSets(bool mergeroms)
/// <param name="mergeroms">Dedupe type to be used</param>
public void CreateSplitSets(DedupeType mergeroms)
{
Globals.Logger.User("Creating split sets from the DAT");

View File

@@ -148,7 +148,7 @@ namespace SabreTools.Library.Dats
}
// Now that we have a list of depots, we want to sort the input DAT by SHA-1
BucketBy(SortedBy.SHA1, false /* mergeroms */);
BucketBy(SortedBy.SHA1, DedupeType.None);
// Then we want to loop through each of the hashes and see if we can rebuild
List<string> hashes = Keys.ToList();
@@ -884,7 +884,7 @@ namespace SabreTools.Library.Dats
}
// Now that we have a list of depots, we want to sort the input DAT by SHA-1
BucketBy(SortedBy.SHA1, false /* mergeroms */);
BucketBy(SortedBy.SHA1, DedupeType.None);
// Then we want to loop through each of the hashes and see if we can rebuild
List<string> hashes = Keys.ToList();
@@ -994,7 +994,7 @@ namespace SabreTools.Library.Dats
if (hashOnly)
{
// First we need to sort by hash to get duplicates
BucketBy(SortedBy.SHA1, false /* mergeroms */);
BucketBy(SortedBy.SHA1, DedupeType.None);
// Then follow the same tactics as before
foreach (string key in Keys)

View File

@@ -155,7 +155,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile sha512 = new DatFile
{
@@ -176,7 +176,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile sha384 = new DatFile
{
@@ -197,7 +197,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile sha256 = new DatFile
{
@@ -218,7 +218,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile sha1 = new DatFile
{
@@ -239,7 +239,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile md5 = new DatFile
{
@@ -260,7 +260,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile crc = new DatFile
{
@@ -281,7 +281,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile other = new DatFile
{
@@ -302,7 +302,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
// Now populate each of the DAT objects in turn
@@ -405,7 +405,7 @@ namespace SabreTools.Library.Dats
basepath = (basepath.EndsWith(Path.DirectorySeparatorChar.ToString()) ? basepath : basepath + Path.DirectorySeparatorChar);
// First, organize by games so that we can do the right thing
BucketBy(SortedBy.Game, false /* mergeroms */, lower: false, norename: true);
BucketBy(SortedBy.Game, DedupeType.None, lower: false, norename: true);
// Create a temporary DAT to add things to
DatFile tempDat = new DatFile(this)
@@ -537,7 +537,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile diskdat = new DatFile
{
@@ -558,7 +558,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
DatFile sampledat = new DatFile
{
@@ -579,7 +579,7 @@ namespace SabreTools.Library.Dats
ForceNodump = this.ForceNodump,
ForcePacking = this.ForcePacking,
DatFormat = this.DatFormat,
MergeRoms = this.MergeRoms,
DedupeRoms = this.DedupeRoms,
};
// Now populate each of the DAT objects in turn

View File

@@ -205,7 +205,7 @@ namespace SabreTools.Library.Dats
RecalculateStats();
}
BucketBy(SortedBy.Game, false /* mergeroms */, norename: true);
BucketBy(SortedBy.Game, DedupeType.None, norename: true);
if (_totalSize < 0)
{
_totalSize = Int64.MaxValue + _totalSize;
@@ -502,7 +502,7 @@ namespace SabreTools.Library.Dats
List<string> games = new List<string>();
DatFile datdata = new DatFile();
datdata.Parse(filename.Item1, 0, 0);
datdata.BucketBy(SortedBy.Game, false /* mergeroms */, norename: true);
datdata.BucketBy(SortedBy.Game, DedupeType.None, norename: true);
// Output single DAT stats (if asked)
Globals.Logger.User("Adding stats for file '{0}'\n", false, filename.Item1);

View File

@@ -105,14 +105,18 @@ namespace SabreTools.Library.Dats
recalculate: (RomCount + DiskCount == 0), baddumpCol: true, nodumpCol: true);
}
// First bucket by CRC to dedupe if required
if (MergeRoms)
// Bucket and dedupe according to the flag
if (DedupeRoms == DedupeType.Full)
{
BucketBy(SortedBy.CRC, MergeRoms, norename: norename);
BucketBy(SortedBy.CRC, DedupeRoms, norename: norename);
}
else if (DedupeRoms == DedupeType.Game)
{
BucketBy(SortedBy.Game, DedupeRoms, norename: norename);
}
// Bucket roms by game name
BucketBy(SortedBy.Game, false /* mergeRoms */, norename: norename);
// Bucket roms by game name, if not already
BucketBy(SortedBy.Game, DedupeType.None, norename: norename);
// Output the number of items we're going to be writing
Globals.Logger.User("A total of {0} items will be written out to '{1}'", Count, FileName);

View File

@@ -1102,6 +1102,11 @@ Options:
For all outputted DATs, allow for hash deduping. This makes sure
that there are effectively no duplicates in the output files.
-gdd, --game-dedup Enable deduping within games in the created DAT
For all outputted DATs, allow for hash deduping but only within the
games, and not across the entire DAT. This makes sure that there
are effectively no duplicates within each of the output sets.
-m, --merge Merge the input DATs
By default, all DATs are processed individually with the user-
specified flags. With this flag enabled, all of the input DATs are

View File

@@ -1130,6 +1130,11 @@ namespace SabreTools
"Enable deduping in the created DAT",
FeatureType.Flag,
null));
update.AddFeature("game-dedup", new Feature(
new List<string>() { "-gdd", "--game-dedup" },
"Enable in-game deduping in the created DAT",
FeatureType.Flag,
null));
update.AddFeature("merge", new Feature(
new List<string>() { "-m", "--merge" },
"Merge the input DATs",

View File

@@ -497,7 +497,7 @@ namespace SabreTools
/// <param name="clean">True to clean the game names to WoD standard, false otherwise (default)</param>
/// <param name="remUnicode">True if we should remove non-ASCII characters from output, false otherwise (default)</param>
/// <param name="descAsName">True if descriptions should be used as names, false otherwise (default)</param>
/// <param name="dedup">True to dedupe the roms in the DAT, false otherwise (default)</param>
/// <param name="dedup">Dedupe type to use for DAT processing</param>
/// <param name="stripHash">StripHash that represents the hash(es) that you want to remove from the output</param>
private static void InitUpdate(
List<string> inputPaths,
@@ -558,7 +558,7 @@ namespace SabreTools
bool clean,
bool remUnicode,
bool descAsName,
bool dedup,
DedupeType dedup,
Hash stripHash)
{
// Set the special flags
@@ -668,11 +668,11 @@ namespace SabreTools
}
if (name == "")
{
name = (diffMode != 0 ? "DiffDAT" : "MergeDAT") + (superdat ? "-SuperDAT" : "") + (dedup ? "-deduped" : "");
name = (diffMode != 0 ? "DiffDAT" : "MergeDAT") + (superdat ? "-SuperDAT" : "") + (dedup != DedupeType.None ? "-deduped" : "");
}
if (description == "")
{
description = (diffMode != 0 ? "DiffDAT" : "MergeDAT") + (superdat ? "-SuperDAT" : "") + (dedup ? " - deduped" : "");
description = (diffMode != 0 ? "DiffDAT" : "MergeDAT") + (superdat ? "-SuperDAT" : "") + (dedup != DedupeType.None ? " - deduped" : "");
if (!bare)
{
description += " (" + date + ")";
@@ -708,7 +708,7 @@ namespace SabreTools
ForceMerging = fm,
ForceNodump = fn,
ForcePacking = fp,
MergeRoms = dedup,
DedupeRoms = dedup,
ExcludeOf = excludeOf,
DatFormat = datFormat,
StripHash = stripHash,

View File

@@ -95,7 +95,6 @@ namespace SabreTools
cleanGameNames = false,
copyFiles = false,
datPrefix = false,
dedup = false,
delete = false,
descAsName = false,
enableGzip = false,
@@ -122,6 +121,7 @@ namespace SabreTools
updateDat = false,
usegame = true;
DatFormat datFormat = 0x0;
DedupeType dedup = DedupeType.None;
DiffMode diffMode = 0x0;
Hash omitFromScan = Hash.SHA256 | Hash.SHA384 | Hash.SHA512; // Should be set to 0x0 later
Hash stripHash = 0x0;
@@ -326,7 +326,7 @@ namespace SabreTools
break;
case "-dd":
case "--dedup":
dedup = true;
dedup = DedupeType.Full;
break;
case "-del":
case "--delete":
@@ -372,6 +372,10 @@ namespace SabreTools
case "--files":
parseArchivesAsFiles = true;
break;
case "-gdd":
case "--game-dedup":
dedup = DedupeType.Game;
break;
case "-gp":
case "--game-prefix":
datPrefix = true;