From 335db1eb9eccc9cb101f2d4a115ef0adb20ee792 Mon Sep 17 00:00:00 2001 From: Adam Hathcock Date: Mon, 26 Jan 2026 18:10:59 +0000 Subject: [PATCH] fix ValueTask struct copying --- AGENTS.md | 55 +++++++++++++++++++ .../LZMA/RangeCoder/RangeCoderBit.Async.cs | 19 ++++--- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 35e77d70..d3f4b598 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -179,3 +179,58 @@ SharpCompress supports multiple archive and compression formats: 3. **Stream disposal** - Always set `LeaveStreamOpen` explicitly when needed (default is to close) 4. **Tar + non-seekable stream** - Must provide file size or it will throw 6. **Format detection** - Use `ReaderFactory.Open()` for auto-detection, test with actual archive files + +### Async Struct-Copy Bug in LZMA RangeCoder + +When implementing async methods on mutable `struct` types (like `BitEncoder` and `BitDecoder` in the LZMA RangeCoder), be aware that the async state machine copies the struct when `await` is encountered. This means mutations to struct fields after the `await` point may not persist back to the original struct stored in arrays or fields. + +**The Bug:** +```csharp +// BAD: async method on mutable struct +public async ValueTask DecodeAsync(Decoder decoder, CancellationToken cancellationToken = default) +{ + var newBound = (decoder._range >> K_NUM_BIT_MODEL_TOTAL_BITS) * _prob; + if (decoder._code < newBound) + { + decoder._range = newBound; + _prob += (K_BIT_MODEL_TOTAL - _prob) >> K_NUM_MOVE_BITS; // Mutates _prob + await decoder.Normalize2Async(cancellationToken).ConfigureAwait(false); // Struct gets copied here + return 0; // Original _prob update may be lost + } + // ... +} +``` + +**The Fix:** +Refactor async methods on mutable structs to perform all struct mutations synchronously before any `await`, or use a helper method to separate the await from the struct mutation: + +```csharp +// GOOD: struct mutations happen synchronously, await is conditional +public ValueTask DecodeAsync(Decoder decoder, CancellationToken cancellationToken = default) +{ + var newBound = (decoder._range >> K_NUM_BIT_MODEL_TOTAL_BITS) * _prob; + if (decoder._code < newBound) + { + decoder._range = newBound; + _prob += (K_BIT_MODEL_TOTAL - _prob) >> K_NUM_MOVE_BITS; // All mutations complete + return DecodeAsyncHelper(decoder.Normalize2Async(cancellationToken), 0); // Await in helper + } + decoder._range -= newBound; + decoder._code -= newBound; + _prob -= (_prob) >> K_NUM_MOVE_BITS; // All mutations complete + return DecodeAsyncHelper(decoder.Normalize2Async(cancellationToken), 1); // Await in helper +} + +private static async ValueTask DecodeAsyncHelper(ValueTask normalizeTask, uint result) +{ + await normalizeTask.ConfigureAwait(false); + return result; +} +``` + +**Why This Matters:** +In LZMA, the `BitEncoder` and `BitDecoder` structs maintain adaptive probability models in their `_prob` field. When these structs are stored in arrays (e.g., `_models[m]`), the async state machine copy breaks the adaptive model, causing incorrect bit decoding and eventually `DataErrorException` exceptions. + +**Related Files:** +- `src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.Async.cs` - Fixed +- `src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBitTree.Async.cs` - Uses readonly structs, so this pattern doesn't apply diff --git a/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.Async.cs b/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.Async.cs index a33dfa02..8de17dd9 100644 --- a/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.Async.cs +++ b/src/SharpCompress/Compressors/LZMA/RangeCoder/RangeCoderBit.Async.cs @@ -7,7 +7,7 @@ namespace SharpCompress.Compressors.LZMA.RangeCoder; internal partial struct BitEncoder { - public async ValueTask EncodeAsync( + public ValueTask EncodeAsync( Encoder encoder, uint symbol, CancellationToken cancellationToken = default @@ -28,14 +28,15 @@ internal partial struct BitEncoder if (encoder._range < Encoder.K_TOP_VALUE) { encoder._range <<= 8; - await encoder.ShiftLowAsync(cancellationToken).ConfigureAwait(false); + return encoder.ShiftLowAsync(cancellationToken); } + return default; } } internal partial struct BitDecoder { - public async ValueTask DecodeAsync( + public ValueTask DecodeAsync( Decoder decoder, CancellationToken cancellationToken = default ) @@ -45,13 +46,17 @@ internal partial struct BitDecoder { decoder._range = newBound; _prob += (K_BIT_MODEL_TOTAL - _prob) >> K_NUM_MOVE_BITS; - await decoder.Normalize2Async(cancellationToken).ConfigureAwait(false); - return 0; + return DecodeAsyncHelper(decoder.Normalize2Async(cancellationToken), 0); } decoder._range -= newBound; decoder._code -= newBound; _prob -= (_prob) >> K_NUM_MOVE_BITS; - await decoder.Normalize2Async(cancellationToken).ConfigureAwait(false); - return 1; + return DecodeAsyncHelper(decoder.Normalize2Async(cancellationToken), 1); + } + + private static async ValueTask DecodeAsyncHelper(ValueTask normalizeTask, uint result) + { + await normalizeTask.ConfigureAwait(false); + return result; } }