Compare commits

...

4 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
f59b14a278 WIP: Implement compressed tar detection in TarFactory (seeking clarification)
Enhanced TarFactory.IsArchive() to detect compressed tar formats (tar.bz2, tar.lz, etc.) by:
- Checking if stream contains tar header directly (for uncompressed)
- Testing each compression format to see if it contains a tar file

Enhanced TarFactory.Open() methods to decompress compressed tar files to MemoryStream for seekable access.

Current blockers:
- Some compression stream constructors (LZipStream) don't support leaveOpen parameter
- Stream disposal during detection phase causes issues with factory detection flow

Awaiting clarification from @adamhathcock on preferred architecture approach.

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2025-12-09 16:39:41 +00:00
copilot-swe-agent[bot]
3870cc8d34 Improve technical accuracy of error messages
Clarify that the Archive API requires seekable streams, but decompression streams are not seekable, rather than stating that decompression requires random access.

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2025-12-09 08:41:37 +00:00
copilot-swe-agent[bot]
242e442a8c Add helpful error messages for compressed tar archives in ArchiveFactory
Detect when users try to open tar.bz2, tar.lz, and other compressed tar formats with ArchiveFactory.Open() and provide clear guidance to use ReaderFactory.Open() instead. These formats require forward-only reading due to decompression stream limitations and cannot be used with the random-access Archive API.

Includes tests to verify the helpful error messages are shown.

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2025-12-09 08:36:50 +00:00
copilot-swe-agent[bot]
d95d1e928b Initial plan 2025-12-09 08:20:53 +00:00
3 changed files with 410 additions and 7 deletions

View File

@@ -3,6 +3,10 @@ using System.Collections.Generic;
using System.IO;
using System.Linq;
using SharpCompress.Common;
using SharpCompress.Common.Tar.Headers;
using SharpCompress.Compressors;
using SharpCompress.Compressors.BZip2;
using SharpCompress.Compressors.LZMA;
using SharpCompress.Factories;
using SharpCompress.IO;
using SharpCompress.Readers;
@@ -131,10 +135,10 @@ public static class ArchiveFactory
{
finfo.NotNull(nameof(finfo));
using Stream stream = finfo.OpenRead();
return FindFactory<T>(stream);
return FindFactory<T>(stream, finfo.Name);
}
private static T FindFactory<T>(Stream stream)
private static T FindFactory<T>(Stream stream, string? fileName = null)
where T : IFactory
{
stream.NotNull(nameof(stream));
@@ -159,6 +163,16 @@ public static class ArchiveFactory
}
}
stream.Seek(startPosition, SeekOrigin.Begin);
// Check if this is a compressed tar file (tar.bz2, tar.lz, etc.)
// These formats are supported by ReaderFactory but not by ArchiveFactory
var compressedTarMessage = TryGetCompressedTarMessage(stream, fileName);
if (compressedTarMessage != null)
{
throw new InvalidOperationException(compressedTarMessage);
}
var extensions = string.Join(", ", factories.Select(item => item.Name));
throw new InvalidOperationException(
@@ -248,4 +262,111 @@ public static class ArchiveFactory
}
public static IArchiveFactory AutoFactory { get; } = new AutoArchiveFactory();
/// <summary>
/// Checks if the stream is a compressed tar file (tar.bz2, tar.lz, etc.) that should use ReaderFactory instead.
/// Returns an error message if detected, null otherwise.
/// </summary>
private static string? TryGetCompressedTarMessage(Stream stream, string? fileName)
{
var startPosition = stream.Position;
try
{
// Check if it's a BZip2 file
if (BZip2Stream.IsBZip2(stream))
{
stream.Seek(startPosition, SeekOrigin.Begin);
// Try to decompress and check if it contains a tar archive
using var decompressed = new BZip2Stream(stream, CompressionMode.Decompress, true);
if (IsTarStream(decompressed))
{
return "This appears to be a tar.bz2 archive. The Archive API requires seekable streams, but decompression streams are not seekable. "
+ "Please use ReaderFactory.Open() instead for forward-only extraction, "
+ "or decompress the file first and then open the resulting tar file with ArchiveFactory.Open().";
}
return null;
}
stream.Seek(startPosition, SeekOrigin.Begin);
// Check if it's an LZip file
if (LZipStream.IsLZipFile(stream))
{
stream.Seek(startPosition, SeekOrigin.Begin);
// Try to decompress and check if it contains a tar archive
using var decompressed = new LZipStream(stream, CompressionMode.Decompress);
if (IsTarStream(decompressed))
{
return "This appears to be a tar.lz archive. The Archive API requires seekable streams, but decompression streams are not seekable. "
+ "Please use ReaderFactory.Open() instead for forward-only extraction, "
+ "or decompress the file first and then open the resulting tar file with ArchiveFactory.Open().";
}
return null;
}
// Check file extension as a fallback for other compressed tar formats
if (fileName != null)
{
var lowerFileName = fileName.ToLowerInvariant();
if (
lowerFileName.EndsWith(".tar.bz2")
|| lowerFileName.EndsWith(".tbz")
|| lowerFileName.EndsWith(".tbz2")
|| lowerFileName.EndsWith(".tb2")
|| lowerFileName.EndsWith(".tz2")
|| lowerFileName.EndsWith(".tar.lz")
|| lowerFileName.EndsWith(".tar.xz")
|| lowerFileName.EndsWith(".txz")
|| lowerFileName.EndsWith(".tar.zst")
|| lowerFileName.EndsWith(".tar.zstd")
|| lowerFileName.EndsWith(".tzst")
|| lowerFileName.EndsWith(".tzstd")
|| lowerFileName.EndsWith(".tar.z")
|| lowerFileName.EndsWith(".tz")
|| lowerFileName.EndsWith(".taz")
)
{
return $"The file '{fileName}' appears to be a compressed tar archive. The Archive API requires seekable streams, but decompression streams are not seekable. "
+ "Please use ReaderFactory.Open() instead for forward-only extraction, "
+ "or decompress the file first and then open the resulting tar file with ArchiveFactory.Open().";
}
}
return null;
}
catch
{
// If we can't determine, just return null and let the normal error handling proceed
return null;
}
finally
{
try
{
stream.Seek(startPosition, SeekOrigin.Begin);
}
catch
{
// Ignore seek failures
}
}
}
/// <summary>
/// Checks if a stream contains a tar archive by trying to read a tar header.
/// </summary>
private static bool IsTarStream(Stream stream)
{
try
{
var tarHeader = new TarHeader(new ArchiveEncoding());
return tarHeader.Read(new BinaryReader(stream));
}
catch
{
return false;
}
}
}

View File

@@ -57,19 +57,238 @@ public class TarFactory
Stream stream,
string? password = null,
int bufferSize = ReaderOptions.DefaultBufferSize
) => TarArchive.IsTarFile(stream);
)
{
if (!stream.CanSeek)
{
return TarArchive.IsTarFile(stream); // For non-seekable streams, just check if it's a tar file
}
var startPosition = stream.Position;
// First check if it's a regular tar file
if (TarArchive.IsTarFile(stream))
{
stream.Seek(startPosition, SeekOrigin.Begin); // Seek back for consistency
return true;
}
// Seek back after the tar file check
stream.Seek(startPosition, SeekOrigin.Begin);
if (compressionOptions == null)
{
return false;
}
try
{
// Try each compression option to see if it contains a tar file
foreach (var testOption in compressionOptions)
{
if (testOption.Type == CompressionType.None)
{
continue; // Skip uncompressed
}
stream.Seek(startPosition, SeekOrigin.Begin);
try
{
if (testOption.CanHandle(stream))
{
stream.Seek(startPosition, SeekOrigin.Begin);
// Try to decompress and check if it contains a tar archive
// For compression formats that don't support leaveOpen, we need to save/restore position
var positionBeforeDecompress = stream.Position;
Stream? decompressedStream = null;
bool streamWasClosed = false;
try
{
decompressedStream = testOption.Type switch
{
CompressionType.BZip2 => new BZip2Stream(stream, CompressionMode.Decompress, true),
_ => testOption.CreateStream(stream) // For other types, may close the stream
};
if (TarArchive.IsTarFile(decompressedStream))
{
return true;
}
}
catch (ObjectDisposedException)
{
streamWasClosed = true;
throw; // Stream was closed, can't continue
}
finally
{
decompressedStream?.Dispose();
if (!streamWasClosed && stream.CanSeek)
{
try
{
stream.Seek(positionBeforeDecompress, SeekOrigin.Begin);
}
catch
{
// If seek fails, the stream might have been closed
}
}
}
// Seek back to start after decompression attempt
stream.Seek(startPosition, SeekOrigin.Begin);
}
}
catch
{
// If decompression fails, it's not this format - continue to next option
try
{
stream.Seek(startPosition, SeekOrigin.Begin);
}
catch
{
// Ignore seek failures
}
}
}
return false;
}
finally
{
try
{
stream.Seek(startPosition, SeekOrigin.Begin);
}
catch
{
// Ignore seek failures
}
}
}
#endregion
#region IArchiveFactory
/// <inheritdoc/>
public IArchive Open(Stream stream, ReaderOptions? readerOptions = null) =>
TarArchive.Open(stream, readerOptions);
public IArchive Open(Stream stream, ReaderOptions? readerOptions = null)
{
readerOptions ??= new ReaderOptions();
// Try to detect and handle compressed tar formats
if (stream.CanSeek)
{
var startPosition = stream.Position;
// Try each compression option to see if we can decompress it
foreach (var testOption in compressionOptions)
{
if (testOption.Type == CompressionType.None)
{
continue; // Skip uncompressed
}
stream.Seek(startPosition, SeekOrigin.Begin);
if (testOption.CanHandle(stream))
{
stream.Seek(startPosition, SeekOrigin.Begin);
// Decompress the entire stream into a seekable MemoryStream
using var decompressedStream = testOption.CreateStream(stream);
var memoryStream = new MemoryStream();
decompressedStream.CopyTo(memoryStream);
memoryStream.Position = 0;
// Verify it's actually a tar file
if (TarArchive.IsTarFile(memoryStream))
{
memoryStream.Position = 0;
// Return a TarArchive from the decompressed memory stream
// The TarArchive will own the MemoryStream and dispose it when disposed
var options = new ReaderOptions
{
LeaveStreamOpen = false, // Ensure the MemoryStream is disposed with the archive
ArchiveEncoding = readerOptions?.ArchiveEncoding ?? new ArchiveEncoding()
};
return TarArchive.Open(memoryStream, options);
}
memoryStream.Dispose();
}
}
stream.Seek(startPosition, SeekOrigin.Begin);
}
// Fall back to normal tar archive opening
return TarArchive.Open(stream, readerOptions);
}
/// <inheritdoc/>
public IArchive Open(FileInfo fileInfo, ReaderOptions? readerOptions = null) =>
TarArchive.Open(fileInfo, readerOptions);
public IArchive Open(FileInfo fileInfo, ReaderOptions? readerOptions = null)
{
readerOptions ??= new ReaderOptions();
// Try to detect and handle compressed tar formats by file extension and content
using var fileStream = fileInfo.OpenRead();
// Try each compression option
foreach (var testOption in compressionOptions)
{
if (testOption.Type == CompressionType.None)
{
continue; // Skip uncompressed
}
// Check if file extension matches
var fileName = fileInfo.Name.ToLowerInvariant();
if (testOption.KnownExtensions.Any(ext => fileName.EndsWith(ext)))
{
fileStream.Position = 0;
// Verify it's the right compression format
if (testOption.CanHandle(fileStream))
{
fileStream.Position = 0;
// Decompress the entire file into a seekable MemoryStream
using var decompressedStream = testOption.CreateStream(fileStream);
var memoryStream = new MemoryStream();
decompressedStream.CopyTo(memoryStream);
memoryStream.Position = 0;
// Verify it's actually a tar file
if (TarArchive.IsTarFile(memoryStream))
{
memoryStream.Position = 0;
// Return a TarArchive from the decompressed memory stream
// The TarArchive will own the MemoryStream and dispose it when disposed
var options = new ReaderOptions
{
LeaveStreamOpen = false, // Ensure the MemoryStream is disposed with the archive
ArchiveEncoding = readerOptions?.ArchiveEncoding ?? new ArchiveEncoding()
};
return TarArchive.Open(memoryStream, options);
}
memoryStream.Dispose();
}
}
}
// fileStream will be closed by the using statement
// Fall back to normal tar archive opening
return TarArchive.Open(fileInfo, readerOptions);
}
#endregion

View File

@@ -0,0 +1,63 @@
using System;
using System.IO;
using SharpCompress.Archives;
using Xunit;
namespace SharpCompress.Test;
public class ArchiveFactoryCompressedTarTests : TestBase
{
[Fact]
public void ArchiveFactory_Open_TarBz2_ThrowsHelpfulException()
{
var testFile = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2");
var exception = Assert.Throws<InvalidOperationException>(() =>
{
using var archive = ArchiveFactory.Open(testFile);
});
Assert.Contains("tar.bz2", exception.Message);
Assert.Contains("ReaderFactory", exception.Message);
}
[Fact]
public void ArchiveFactory_Open_TarLz_ThrowsHelpfulException()
{
var testFile = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.lz");
var exception = Assert.Throws<InvalidOperationException>(() =>
{
using var archive = ArchiveFactory.Open(testFile);
});
Assert.Contains("tar.lz", exception.Message);
Assert.Contains("ReaderFactory", exception.Message);
}
[Fact]
public void ArchiveFactory_Open_TarBz2Stream_ThrowsHelpfulException()
{
var testFile = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2");
using var stream = File.OpenRead(testFile);
var exception = Assert.Throws<InvalidOperationException>(() =>
{
using var archive = ArchiveFactory.Open(stream);
});
Assert.Contains("tar.bz2", exception.Message);
Assert.Contains("ReaderFactory", exception.Message);
}
[Fact]
public void ArchiveFactory_Open_TarLzStream_ThrowsHelpfulException()
{
var testFile = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.lz");
using var stream = File.OpenRead(testFile);
var exception = Assert.Throws<InvalidOperationException>(() =>
{
using var archive = ArchiveFactory.Open(stream);
});
Assert.Contains("tar.lz", exception.Message);
Assert.Contains("ReaderFactory", exception.Message);
}
}