[DATFromDir] Add copy files flag

This flag forces files to be copied to the temporary directory before being parsed. This can decrease the parsing time if the temp directory is on an SSD.
This commit is contained in:
Matt Nadareski
2016-09-21 21:16:53 -07:00
parent 39a9797e13
commit bd049f12ec
7 changed files with 42 additions and 19 deletions

View File

@@ -71,7 +71,7 @@ namespace SabreTools
foreach (string input in inputs)
{
DATFromDir dfd = new DATFromDir(input, datdata, false /* noMD5 */, false /* noSHA1 */, true /* bare */, false /* archivesAsFiles */,
true /* enableGzip */, false /* addBlanks */, false /* addDate */, "__temp__" /* tempDir */, 4 /* maxDegreeOfParallelism */, _logger);
true /* enableGzip */, false /* addBlanks */, false /* addDate */, "__temp__" /* tempDir */, false /* copyFiles */, 4 /* maxDegreeOfParallelism */, _logger);
dfd.Start();
DatFile.WriteDatfile(dfd.DatData, "", logger);
}

View File

@@ -131,6 +131,7 @@ namespace SabreTools.Helper
helptext.Add(" -sd, --superdat Enable SuperDAT creation");
helptext.Add(" -ab, --add-blank Output blank files for folders");
helptext.Add(" -ad, --add-date Output dates for each file parsed");
helptext.Add(" -cf, --copy-files Copy files to the temp directory before parsing");
helptext.Add(" -t=, --temp= Set the temporary directory to use");
helptext.Add(" -mt={4} Amount of threads to use (-1 unlimted)");
helptext.Add(" -es, --ext-split Split a DAT by two file extensions");

View File

@@ -26,6 +26,7 @@ namespace SabreTools
private bool _enableGzip;
private bool _addBlanks;
private bool _addDate;
private bool _copyFiles;
private int _maxDegreeOfParallelism;
// Other required variables
@@ -50,10 +51,11 @@ namespace SabreTools
/// <param name="addBlanks">True if blank items should be created for empty folders, false otherwise</param>
/// <param name="addDate">True if dates should be archived for all files, false otherwise</param>
/// <param name="tempDir">Name of the directory to create a temp folder in (blank is current directory)</param>
/// <param name="copyFiles">True if files should be copied to the temp directory before hashing, false otherwise</param>
/// <param name="maxDegreeOfParallelism">Integer representing the maximum amount of parallelization to be used</param>
/// <param name="logger">Logger object for console and file output</param>
public DATFromDir(string basePath, DatFile datdata, bool noMD5, bool noSHA1, bool bare, bool archivesAsFiles,
bool enableGzip, bool addBlanks, bool addDate, string tempDir, int maxDegreeOfParallelism, Logger logger)
bool enableGzip, bool addBlanks, bool addDate, string tempDir, bool copyFiles, int maxDegreeOfParallelism, Logger logger)
{
_basePath = Path.GetFullPath(basePath);
_datdata = datdata;
@@ -67,6 +69,7 @@ namespace SabreTools
_addBlanks = addBlanks;
_addDate = addDate;
_tempDir = (String.IsNullOrEmpty(tempDir) ? Path.GetTempPath() : tempDir);
_copyFiles = copyFiles;
_maxDegreeOfParallelism = maxDegreeOfParallelism;
_logger = logger;
}
@@ -188,10 +191,18 @@ namespace SabreTools
// Define the temporary directory
string tempSubDir = Path.GetFullPath(Path.Combine(_tempDir, Path.GetRandomFileName())) + Path.DirectorySeparatorChar;
// If we're copying files, copy it first and get the new filename
string newitem = item;
if (_copyFiles)
{
newitem = Path.Combine(tempSubDir, Path.GetFileName(item));
File.Copy(item, newitem, true);
}
// Special case for if we are in Romba mode (all names are supposed to be SHA-1 hashes)
if (_datdata.Romba)
{
Rom rom = FileTools.GetTorrentGZFileInfo(item, _logger);
Rom rom = FileTools.GetTorrentGZFileInfo(newitem, _logger);
// If the rom is valid, write it out
if (rom.Name != null)
@@ -207,13 +218,13 @@ namespace SabreTools
}
_datdata.Files[key].Add(rom);
_logger.User("File added: " + Path.GetFileNameWithoutExtension(item) + Environment.NewLine);
_logger.User("File added: " + Path.GetFileNameWithoutExtension(newitem) + Environment.NewLine);
}
}
else
{
_logger.User("File not added: " + Path.GetFileNameWithoutExtension(item) + Environment.NewLine);
_logger.User("File not added: " + Path.GetFileNameWithoutExtension(newitem) + Environment.NewLine);
return;
}
@@ -223,30 +234,30 @@ namespace SabreTools
// If both deep hash skip flags are set, do a quickscan
if (_noMD5 && _noSHA1)
{
ArchiveType? type = FileTools.GetCurrentArchiveType(item, _logger);
ArchiveType? type = FileTools.GetCurrentArchiveType(newitem, _logger);
// If we have an archive, scan it
if (type != null && !_archivesAsFiles)
{
List<Rom> extracted = FileTools.GetArchiveFileInfo(item, _logger);
List<Rom> extracted = FileTools.GetArchiveFileInfo(newitem, _logger);
foreach (Rom rom in extracted)
{
ProcessFileHelper(item,
ProcessFileHelper(newitem,
rom,
_basePath,
(Path.GetDirectoryName(Path.GetFullPath(item)) + Path.DirectorySeparatorChar).Remove(0, _basePath.Length) + Path.GetFileNameWithoutExtension(item));
}
}
// Otherwise, just get the info on the file itself
else if (File.Exists(item))
else if (File.Exists(newitem))
{
ProcessFile(item, _basePath, "");
ProcessFile(newitem, _basePath, "");
}
}
// Otherwise, attempt to extract the files to the temporary directory
else
{
bool encounteredErrors = FileTools.ExtractArchive(item,
bool encounteredErrors = FileTools.ExtractArchive(newitem,
tempSubDir,
(_archivesAsFiles ? ArchiveScanLevel.External : ArchiveScanLevel.Internal),
(!_archivesAsFiles && _enableGzip ? ArchiveScanLevel.Internal : ArchiveScanLevel.External),
@@ -257,7 +268,7 @@ namespace SabreTools
// If the file was an archive and was extracted successfully, check it
if (!encounteredErrors)
{
_logger.Log(Path.GetFileName(item) + " treated like an archive");
_logger.Log(Path.GetFileName(newitem) + " treated like an archive");
List<string> extracted = Directory.EnumerateFiles(tempSubDir, "*", SearchOption.AllDirectories).ToList();
Parallel.ForEach(extracted,
new ParallelOptions { MaxDegreeOfParallelism = _maxDegreeOfParallelism },
@@ -268,13 +279,13 @@ namespace SabreTools
Path.Combine((_datdata.Type == "SuperDAT"
? (Path.GetDirectoryName(Path.GetFullPath(item)) + Path.DirectorySeparatorChar).Remove(0, _basePath.Length)
: ""),
Path.GetFileNameWithoutExtension(item)));
Path.GetFileNameWithoutExtension(newitem)));
});
}
// Otherwise, just get the info on the file itself
else if (File.Exists(item))
else if (File.Exists(newitem))
{
ProcessFile(item, _basePath, "");
ProcessFile(newitem, _basePath, "");
}
// Delete the sub temp directory

View File

@@ -138,7 +138,7 @@ namespace SabreTools.Helper
foreach (string input in _inputs)
{
DATFromDir dfd = new DATFromDir(input, _datdata, false /* noMD5 */, false /* noSHA1 */, true /* bare */, false /* archivesAsFiles */,
true /* enableGzip */, false /* addBlanks */, false /* addDate */, "" /* tempDir */, 4 /* maxDegreeOfParallelism */, _logger);
true /* enableGzip */, false /* addBlanks */, false /* addDate */, "" /* tempDir */, false /* copyFiles */, 4 /* maxDegreeOfParallelism */, _logger);
dfd.Start();
_datdata = dfd.DatData;
}

View File

@@ -231,6 +231,10 @@ Options:
If this flag is set, then the Date will be appended to each file information
in the output DAT. The output format is standardized as "yyyy/MM/dd HH:mm:ss".
-cf, --copy-files Copy files to the temp directory before parsing
If this flag is set, then all files that are going to be parsed are moved to the temporary directory before being hashed. This can be helpful in cases where the
temp folder is located on an SSD and the user wants to take advantage of this.
-t=, --temp= Set the name of the temporary directory
Optionally, a temp folder can be supplied in the case the default temp directory
(inside the running folder) is not preferred. This is used for any operations

View File

@@ -70,6 +70,7 @@ namespace SabreTools
/// <param name="addBlanks">True if blank items should be created for empty folders, false otherwise</param>
/// <param name="addDate">True if dates should be archived for all files, false otherwise</param>
/// <param name="tempDir">Name of the directory to create a temp folder in (blank is current directory</param>
/// <param name="copyFiles">True if files should be copied to the temp directory before hashing, false otherwise</param>
/// <param name="maxDegreeOfParallelism">Integer representing the maximum amount of parallelization to be used</param>
private static void InitDatFromDir(List<string> inputs,
string filename,
@@ -90,6 +91,7 @@ namespace SabreTools
bool addBlanks,
bool addDate,
string tempDir,
bool copyFiles,
int maxDegreeOfParallelism)
{
// Create a new DATFromDir object and process the inputs
@@ -119,7 +121,7 @@ namespace SabreTools
datdata.Files = new Dictionary<string, List<DatItem>>();
string basePath = Path.GetFullPath(path);
DATFromDir dfd = new DATFromDir(basePath, datdata, noMD5, noSHA1, bare, archivesAsFiles, enableGzip, addBlanks, addDate, tempDir, maxDegreeOfParallelism, _logger);
DATFromDir dfd = new DATFromDir(basePath, datdata, noMD5, noSHA1, bare, archivesAsFiles, enableGzip, addBlanks, addDate, tempDir, copyFiles, maxDegreeOfParallelism, _logger);
bool success = dfd.Start();
// If it was a success, write the DAT out

View File

@@ -54,6 +54,7 @@ namespace SabreTools
archivesAsFiles = false,
bare = false,
clean = false,
copyFiles = false,
datfromdir = false,
datprefix = false,
dedup = false,
@@ -151,6 +152,10 @@ namespace SabreTools
case "--cascade":
cascade = true;
break;
case "-cf":
case "--copy-files":
copyFiles = true;
break;
case "-csv":
case "--csv":
tsv = false;
@@ -538,8 +543,8 @@ namespace SabreTools
// Create a DAT from a directory or set of directories
if (datfromdir)
{
InitDatFromDir(inputs, filename, name, description, category, version, author, forceunpack, outputFormat,
romba, superdat, noMD5, noSHA1, bare, archivesAsFiles, enableGzip, addBlanks, addDate, tempDir, maxParallelism);
InitDatFromDir(inputs, filename, name, description, category, version, author, forceunpack, outputFormat, romba,
superdat, noMD5, noSHA1, bare, archivesAsFiles, enableGzip, addBlanks, addDate, tempDir, copyFiles, maxParallelism);
}
// Split a DAT by extension