Compare commits

..

1 Commits

Author SHA1 Message Date
Evgenii Kliuchnikov
01c3f685f3 Add py free-threaded support + concurrent access test
PiperOrigin-RevId: 830892275
2025-11-21 04:54:39 -08:00
20 changed files with 580 additions and 348 deletions

View File

@@ -133,11 +133,22 @@ jobs:
c_compiler: clang
cxx_compiler: clang++
- name: python3.10-win
- name: python3.14:clang
build_system: python
python_version: "3.10"
# TODO: investigate why win-builds can't run tests
os: windows-2022
python_version: "3.14"
c_compiler: clang
cxx_compiler: clang++
- name: python3.14t:clang
build_system: python
python_version: "3.14t"
c_compiler: clang
cxx_compiler: clang++
- name: python3.14-win
build_system: python
python_version: "3.14"
os: windows-latest
- name: maven
build_system: maven
@@ -220,7 +231,7 @@ jobs:
sudo apt install -y ${EXTRA_PACKAGES}
- name: Checkout the source
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: false
fetch-depth: 1
@@ -321,7 +332,7 @@ jobs:
# cd integration
# mvn -B verify
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
if: ${{ matrix.build_system == 'python' }}
with:
python-version: ${{ matrix.python_version }}
@@ -331,8 +342,7 @@ jobs:
if: ${{ matrix.build_system == 'python' }}
run: |
python -VV
python -c "import sys; sys.exit('Invalid python version') if '.'.join(map(str,sys.version_info[0:2])) != '${{ matrix.python_version }}' else True"
pip install setuptools==51.3.3 pytest
pip install "setuptools>=70.0.0" pytest
python setup.py build_ext --inplace
pytest ./python/tests
@@ -342,7 +352,7 @@ jobs:
steps:
- name: Checkout the source
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: false
fetch-depth: 1

View File

@@ -39,7 +39,7 @@ jobs:
with:
egress-policy: audit
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: true
fetch-depth: 1

View File

@@ -40,7 +40,7 @@ jobs:
egress-policy: audit
- name: Checkout repository
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
@@ -71,6 +71,8 @@ jobs:
- if: matrix.language == 'cpp' || matrix.language == 'python'
name: Build Python
run: |
python -VV
pip install "setuptools>=70.0.0"
python setup.py build_ext
- name: Perform CodeQL Analysis

View File

@@ -35,7 +35,7 @@ jobs:
egress-policy: audit
- name: Checkout repository
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Install tools
run: |
@@ -50,6 +50,6 @@ jobs:
- name: Lint Python code
run: |
eval "$(/home/linuxbrew/.linuxbrew/bin/brew shellenv)"
ruff check --extend-select=C4,C90,PERF,RET,SIM,W
ruff check
# TODO(eustas): run buildifier

View File

@@ -69,7 +69,7 @@ jobs:
egress-policy: audit
- name: Checkout the source
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: false
fetch-depth: 1
@@ -150,7 +150,7 @@ jobs:
egress-policy: audit
- name: Checkout the source
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: false
fetch-depth: 1
@@ -176,7 +176,7 @@ jobs:
steps:
- name: Checkout the source
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: false
fetch-depth: 1
@@ -208,7 +208,7 @@ jobs:
egress-policy: audit
- name: Checkout the source
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
submodules: false
fetch-depth: 1

View File

@@ -42,7 +42,7 @@ jobs:
egress-policy: audit
- name: "Checkout code"
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
persist-credentials: false

View File

@@ -433,7 +433,7 @@ static size_t UpdateNodes(
const CompoundDictionary* addon = &params->dictionary.compound;
size_t gap = addon->total_size;
BROTLI_DCHECK(cur_ix_masked + max_len <= ringbuffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_len <= ringbuffer_mask);
EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
starting_dist_cache, model, queue, nodes);

View File

@@ -545,7 +545,7 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
}
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
for (i = 0; i < 4; ++i) {
const size_t distance = (size_t)distance_cache[i];
@@ -656,7 +656,7 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
}
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
while (item == 0) {
size_t offset;

View File

@@ -213,7 +213,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
out->len = 0;
out->len_code_delta = 0;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
/* Try last distance first. */
for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {

View File

@@ -178,7 +178,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
out->len = 0;
out->len_code_delta = 0;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
/* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {

View File

@@ -195,7 +195,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
out->len = 0;
out->len_code_delta = 0;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
/* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {

View File

@@ -178,7 +178,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
out->len = 0;
out->len_code_delta = 0;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
/* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {

View File

@@ -165,7 +165,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
size_t cached_backward = (size_t)distance_cache[0];
size_t prev_ix = cur_ix - cached_backward;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
out->len_code_delta = 0;
if (prev_ix < cur_ix) {

View File

@@ -170,7 +170,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
out->len = 0;
out->len_code_delta = 0;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask + 1);
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
/* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {

View File

@@ -22,6 +22,28 @@
#error "Only Python 3.10+ is supported"
#endif
/*
Decoder / encoder nature does not support concurrent access. Attempt to enter
concurrently will result in an exception.
"Critical" parts used in prologues to ensure that only one thread enters.
For consistency, we use them in epilogues as well. "Critical" is essential for
free-threaded. In GIL environment those rendered as a scope (i.e. `{` and `}`).
NB: `Py_BEGIN_ALLOW_THREADS` / `Py_END_ALLOW_THREADS` are still required to
unblock the stop-the-world GC.
*/
#ifdef Py_GIL_DISABLED
#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 13)
#error "Critical sections are only available in Python 3.13+"
#endif
#define BROTLI_CRITICAL_START Py_BEGIN_CRITICAL_SECTION(self)
#define BROTLI_CRITICAL_END Py_END_CRITICAL_SECTION()
#else
#define BROTLI_CRITICAL_START {
#define BROTLI_CRITICAL_END }
#endif
static const char kErrorAttr[] = "error";
static const char kModuleAttr[] = "_module";
@@ -449,6 +471,33 @@ static void brotli_Compressor_dealloc(PyBrotli_Compressor* self) {
Py_TYPE(self)->tp_free((PyObject*)self);
}
static int brotli_compressor_enter(PyBrotli_Compressor* self) {
PyObject* self_type = (PyObject*)Py_TYPE((PyObject*)self);
int ok = 1;
BROTLI_CRITICAL_START;
if (self->healthy == 0) {
set_brotli_exception(self_type, kCompressUnhealthyError);
ok = 0;
}
if (ok && self->processing != 0) {
set_brotli_exception(self_type, kCompressConcurrentError);
ok = 0;
}
if (ok) {
self->processing = 1;
}
BROTLI_CRITICAL_END;
return ok;
}
static void brotli_compressor_leave(PyBrotli_Compressor* self) {
BROTLI_CRITICAL_START;
assert(self->processing == 1);
self->processing = 0;
BROTLI_CRITICAL_END;
}
/*
Compress "utility knife" used for process / flush / finish.
@@ -522,28 +571,27 @@ static PyObject* brotli_Compressor_process(PyBrotli_Compressor* self,
PyObject* ret = NULL;
PyObject* input_object = NULL;
Py_buffer input;
int ok = 1;
if (self->healthy == 0) {
set_brotli_exception(self_type, kCompressUnhealthyError);
return NULL;
}
if (self->processing != 0) {
set_brotli_exception(self_type, kCompressConcurrentError);
return NULL;
}
if (!brotli_compressor_enter(self)) return NULL;
if (!PyArg_ParseTuple(args, "O:process", &input_object)) {
return NULL;
ok = 0;
}
if (!get_data_view(input_object, &input)) {
if (ok && !get_data_view(input_object, &input)) {
ok = 0;
}
if (!ok) {
self->healthy = 0;
brotli_compressor_leave(self);
return NULL;
}
self->processing = 1;
ret = compress_stream(self, BROTLI_OPERATION_PROCESS, (uint8_t*)input.buf,
input.len);
PyBuffer_Release(&input);
self->processing = 0;
brotli_compressor_leave(self);
return ret;
}
@@ -551,18 +599,10 @@ static PyObject* brotli_Compressor_flush(PyBrotli_Compressor* self) {
PyObject* self_type = (PyObject*)Py_TYPE((PyObject*)self);
PyObject* ret = NULL;
if (self->healthy == 0) {
set_brotli_exception(self_type, kCompressUnhealthyError);
return NULL;
}
if (self->processing != 0) {
set_brotli_exception(self_type, kCompressConcurrentError);
return NULL;
}
self->processing = 1;
if (!brotli_compressor_enter(self)) return NULL;
ret = compress_stream(self, BROTLI_OPERATION_FLUSH, NULL, 0);
self->processing = 0;
brotli_compressor_leave(self);
return ret;
}
@@ -570,18 +610,10 @@ static PyObject* brotli_Compressor_finish(PyBrotli_Compressor* self) {
PyObject* self_type = (PyObject*)Py_TYPE((PyObject*)self);
PyObject* ret = NULL;
if (self->healthy == 0) {
set_brotli_exception(self_type, kCompressUnhealthyError);
return NULL;
}
if (self->processing != 0) {
set_brotli_exception(self_type, kCompressConcurrentError);
return NULL;
}
self->processing = 1;
if (!brotli_compressor_enter(self)) return NULL;
ret = compress_stream(self, BROTLI_OPERATION_FINISH, NULL, 0);
self->processing = 0;
brotli_compressor_leave(self);
if (ret != NULL) {
assert(BrotliEncoderIsFinished(self->enc));
}
@@ -639,6 +671,33 @@ static int brotli_Decompressor_init(PyBrotli_Decompressor* self, PyObject* args,
return 0;
}
static int brotli_decompressor_enter(PyBrotli_Decompressor* self) {
PyObject* self_type = (PyObject*)Py_TYPE((PyObject*)self);
int ok = 1;
BROTLI_CRITICAL_START;
if (self->healthy == 0) {
set_brotli_exception(self_type, kDecompressUnhealthyError);
ok = 0;
}
if (ok && self->processing != 0) {
set_brotli_exception(self_type, kDecompressConcurrentError);
ok = 0;
}
if (ok) {
self->processing = 1;
}
BROTLI_CRITICAL_END;
return ok;
}
static void brotli_decompressor_leave(PyBrotli_Decompressor* self) {
BROTLI_CRITICAL_START;
assert(self->processing == 1);
self->processing = 0;
BROTLI_CRITICAL_END;
}
static void brotli_Decompressor_dealloc(PyBrotli_Decompressor* self) {
if (self->dec) BrotliDecoderDestroyInstance(self->dec);
if (self->unconsumed_data) {
@@ -664,26 +723,24 @@ static PyObject* brotli_Decompressor_process(PyBrotli_Decompressor* self,
uint8_t* new_tail = NULL;
size_t new_tail_length = 0;
int oom = 0;
int ok = 1;
if (self->healthy == 0) {
set_brotli_exception(self_type, kDecompressUnhealthyError);
return NULL;
}
if (self->processing != 0) {
set_brotli_exception(self_type, kDecompressConcurrentError);
return NULL;
}
if (!brotli_decompressor_enter(self)) return NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|n:process", (char**)kwlist,
&input_object, &output_buffer_limit)) {
return NULL;
ok = 0;
}
if (!get_data_view(input_object, &input)) {
if (ok && !get_data_view(input_object, &input)) {
ok = 0;
}
if (!ok) {
self->healthy = 0;
brotli_decompressor_leave(self);
return NULL;
}
Buffer_Init(&buffer);
self->processing = 1;
if (self->unconsumed_data_length > 0) {
if (input.len > 0) {
@@ -769,21 +826,17 @@ finally:
assert(ret == NULL);
self->healthy = 0;
}
self->processing = 0;
brotli_decompressor_leave(self);
return ret;
}
static PyObject* brotli_Decompressor_is_finished(PyBrotli_Decompressor* self) {
PyObject* self_type = (PyObject*)Py_TYPE((PyObject*)self);
if (self->healthy == 0) {
set_brotli_exception(self_type, kDecompressUnhealthyError);
return NULL;
}
if (self->processing != 0) {
set_brotli_exception(self_type, kDecompressConcurrentError);
return NULL;
}
if (BrotliDecoderIsFinished(self->dec)) {
int result;
if (!brotli_decompressor_enter(self)) return NULL;
result = BrotliDecoderIsFinished(self->dec);
brotli_decompressor_leave(self);
if (result) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
@@ -792,16 +845,11 @@ static PyObject* brotli_Decompressor_is_finished(PyBrotli_Decompressor* self) {
static PyObject* brotli_Decompressor_can_accept_more_data(
PyBrotli_Decompressor* self) {
PyObject* self_type = (PyObject*)Py_TYPE((PyObject*)self);
if (self->healthy == 0) {
set_brotli_exception(self_type, kDecompressUnhealthyError);
return NULL;
}
if (self->processing != 0) {
set_brotli_exception(self_type, kDecompressConcurrentError);
return NULL;
}
if (self->unconsumed_data_length > 0) {
int result;
if (!brotli_decompressor_enter(self)) return NULL;
result = (self->unconsumed_data_length > 0);
brotli_decompressor_leave(self);
if (result) {
Py_RETURN_FALSE;
} else {
Py_RETURN_TRUE;
@@ -1003,7 +1051,9 @@ static PyMethodDef brotli_methods[] = {
static PyModuleDef_Slot brotli_mod_slots[] = {
{Py_mod_exec, brotli_init_mod},
#if (PY_MAJOR_VERSION > 3) || (PY_MINOR_VERSION >= 12)
#ifdef Py_GIL_DISABLED
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
#elif (PY_MAJOR_VERSION > 3) || (PY_MINOR_VERSION >= 12)
{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
#endif
{0, NULL}};

View File

@@ -3,6 +3,11 @@
# Distributed under MIT license.
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
import queue
import random
import threading
import time
import brotli
import pytest
@@ -27,7 +32,7 @@ def test_multiple_process(quality, text_name):
chunk_size = 2048
chunks = _test_utils.chunk_input(original, chunk_size)
compressor = brotli.Compressor(quality=quality)
compressed = b''
compressed = b""
for chunk in chunks:
compressed += compressor.process(chunk)
compressed += compressor.finish()
@@ -42,10 +47,104 @@ def test_multiple_process_and_flush(quality, text_name):
chunk_size = 2048
chunks = _test_utils.chunk_input(original, chunk_size)
compressor = brotli.Compressor(quality=quality)
compressed = b''
compressed = b""
for chunk in chunks:
compressed += compressor.process(chunk)
compressed += compressor.flush()
compressed += compressor.finish()
decompressed = brotli.decompress(compressed)
assert original == decompressed
def make_input(size):
abc = [bytes([b]) for b in b"abcdefghijklmnopqrstuvwxyz"]
abc_cap = [bytes([b]) for b in b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
num_words_by_len = [0, 25, 100, 175, 1700, 1000, 1000, 1000]
word_set = set()
rng = random.Random()
rng.seed(2025)
words_by_len = [[]]
for word_len in range(1, len(num_words_by_len)):
num_words = num_words_by_len[word_len]
words = []
for _ in range(num_words):
while True:
word = b"".join(
[rng.choice(abc_cap)]
+ [rng.choice(abc) for _ in range(word_len - 1)]
)
if word not in word_set:
word_set.add(word)
words.append(word)
break
words_by_len.append(words)
total_size = 0
out = []
while total_size < size:
word_len = rng.choice(range(1, len(num_words_by_len)))
word = rng.choice(words_by_len[word_len])
total_size += len(word)
out.append(word)
return b"".join(out)
def _thread_compress(original, compressor, results):
compressed = compressor.process(original)
compressed += compressor.finish()
results.put(1)
def _thread_concurrent_process(compressor, results):
time.sleep(0.01)
try:
_ = compressor.process(b"whatever")
except brotli.error:
results.put(2)
def _thread_concurrent_flush(compressor, results):
time.sleep(0.02)
try:
_ = compressor.flush()
except brotli.error:
results.put(3)
def _thread_concurrent_finish(compressor, results):
time.sleep(0.03)
try:
_ = compressor.finish()
except brotli.error:
results.put(4)
def test_concurrency():
original = make_input(2 * 1024 * 1024)
compressor = brotli.Compressor(quality=9)
results = queue.Queue()
threads = []
threads.append(
threading.Thread(
target=_thread_compress, args=(original, compressor, results)
)
)
threads.append(
threading.Thread(
target=_thread_concurrent_process, args=(compressor, results)
)
)
threads.append(
threading.Thread(
target=_thread_concurrent_flush, args=(compressor, results)
)
)
threads.append(
threading.Thread(
target=_thread_concurrent_finish, args=(compressor, results)
)
)
for thread in threads:
thread.start()
for thread in threads:
thread.join()
assert sorted(list(results.queue)) == [1, 2, 3, 4]

View File

@@ -3,6 +3,10 @@
# Distributed under MIT license.
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
import queue
import threading
import time
import brotli
import pytest
@@ -89,3 +93,76 @@ def test_already_finished():
decompressor.process(brotli.compress(b'a'))
with pytest.raises(brotli.error):
decompressor.process(b'a')
def make_input(size):
compressor = brotli.Compressor(quality=1)
prologue = compressor.process(b'b')
prologue += compressor.flush()
filler = compressor.process(b'c')
filler += compressor.flush()
epilogue = compressor.finish()
return b''.join([prologue] + [filler] * (size // len(filler)) + [epilogue])
def _thread_decompress(compressed, decompressor, results):
_ = decompressor.process(compressed)
if decompressor.is_finished():
results.put(1)
def _thread_concurrent_process(decompressor, results):
time.sleep(0.01)
try:
_ = decompressor.process(b'')
except brotli.error:
results.put(2)
def _thread_concurrent_can_accept_more_data(decompressor, results):
time.sleep(0.02)
try:
_ = decompressor.can_accept_more_data()
except brotli.error:
results.put(3)
def _thread_concurrent_is_finished(decompressor, results):
time.sleep(0.03)
try:
_ = decompressor.is_finished()
except brotli.error:
results.put(4)
def test_concurrency():
compressed = make_input(16 * 1024 * 1024)
decompressor = brotli.Decompressor()
results = queue.Queue()
threads = []
threads.append(
threading.Thread(
target=_thread_decompress, args=(compressed, decompressor, results)
)
)
threads.append(
threading.Thread(
target=_thread_concurrent_process, args=(decompressor, results)
)
)
threads.append(
threading.Thread(
target=_thread_concurrent_can_accept_more_data,
args=(decompressor, results),
)
)
threads.append(
threading.Thread(
target=_thread_concurrent_is_finished, args=(decompressor, results)
)
)
for thread in threads:
thread.start()
for thread in threads:
thread.join()
assert sorted(list(results.queue)) == [1, 2, 3, 4]

View File

@@ -18,11 +18,13 @@ for line in lines:
if appendix_a_found:
if re_data_line.match(line) is not None:
data = line.strip()
dictionary.extend(int(data[2 * i:2 * i + 2], 16) for i in range(32))
for i in range(32):
dictionary.append(int(data[2 * i:2 * i + 2], 16))
if len(dictionary) == 122784:
break
elif line.startswith("Appendix A."):
appendix_a_found = True
else:
if line.startswith("Appendix A."):
appendix_a_found = True
bin_path = "dictionary.bin"

View File

@@ -40,12 +40,13 @@ for b in data:
is_skip = False
hi.append(unichr(cntr))
cntr = skip_flip_offset + 1
elif value >= 0x80:
cntr += 1
else:
is_skip = True
hi.append(unichr(cntr))
cntr = skip_flip_offset + 1
if value >= 0x80:
cntr += 1
else:
is_skip = True
hi.append(unichr(cntr))
cntr = skip_flip_offset + 1
hi.append(unichr(cntr))
low0 = low[0:len(low) // 2]
@@ -55,15 +56,15 @@ low1 = low[len(low) // 2:len(low)]
def escape(chars):
result = []
for c in chars:
if c == "\r":
if "\r" == c:
result.append("\\r")
elif c == "\n":
elif "\n" == c:
result.append("\\n")
elif c == "\t":
elif "\t" == c:
result.append("\\t")
elif c == "\"":
elif "\"" == c:
result.append("\\\"")
elif c == "\\":
elif "\\" == c:
result.append("\\\\")
elif ord(c) < 32 or ord(c) >= 127:
result.append("\\u%04X" % ord(c))

469
setup.py
View File

@@ -3,131 +3,126 @@
# Distributed under MIT license.
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
"""This script is used for building and packaging the Brotli extension."""
import logging
import os
import re
import unittest
from setuptools import Extension
from setuptools import setup
from distutils.command.build_ext import build_ext
from distutils import errors
from distutils import dep_util
from distutils import log
import setuptools
import setuptools.command.build_ext as build_ext
import setuptools.errors as errors
import setuptools.modified as modified
CURR_DIR = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
LOGGER = logging.getLogger(__name__)
def bool_from_environ(key):
value = os.environ.get(key)
if not value:
return False
if value == "1":
return True
if value == "0":
return False
raise ValueError("Environment variable {} has invalid value {}. Please set it to 1, 0 or an empty string".format(key, value))
def read_define(path, macro):
"""Return macro value from the given file."""
with open(path, "r") as f:
for line in f:
m = re.match(r"#define\s{}\s+(.+)".format(macro), line)
if m:
return m.group(1)
return ""
value = os.environ.get(key)
if not value:
return False
if value == "1":
return True
if value == "0":
return False
raise ValueError(
"Environment variable {} has invalid value {}. Please set it to 1, 0 or"
" an empty string".format(key, value)
)
def get_version():
"""Return library version string from 'common/version.h' file."""
version_file_path = os.path.join(CURR_DIR, "c", "common", "version.h")
major = read_define(version_file_path, "BROTLI_VERSION_MAJOR")
minor = read_define(version_file_path, "BROTLI_VERSION_MINOR")
patch = read_define(version_file_path, "BROTLI_VERSION_PATCH")
if not major or not minor or not patch:
return ""
return "{}.{}.{}".format(major, minor, patch)
"""Return library version string from 'common/version.h' file."""
version_file_path = os.path.join(CURR_DIR, "c", "common", "version.h")
defs = {}
with open(version_file_path, "r") as file:
for line in file:
m = re.match(r"#define\s+(\w+)\s+(\d+)", line)
if m:
defs[m.group(1)] = m.group(2)
parts = ["MAJOR", "MINOR", "PATCH"]
major, minor, patch = [defs.get("BROTLI_VERSION_" + key) for key in parts]
if not major or not minor or not patch:
return ""
return "{}.{}.{}".format(major, minor, patch)
def get_test_suite():
test_loader = unittest.TestLoader()
return test_loader.discover("python", pattern="*_test.py")
class BuildExt(build_ext.build_ext):
"""Customized build_ext command to handle Brotli extension building."""
def get_source_files(self):
filenames = super().get_source_files()
for ext in self.extensions:
filenames.extend(ext.depends)
return filenames
class BuildExt(build_ext):
def get_source_files(self):
filenames = build_ext.get_source_files(self)
for ext in self.extensions:
filenames.extend(ext.depends)
return filenames
def build_extension(self, ext):
if ext.sources is None or not isinstance(ext.sources, (list, tuple)):
raise errors.DistutilsSetupError(
"in 'ext_modules' option (extension '%s'), "
"'sources' must be present and must be "
"a list of source filenames"
% ext.name
)
def build_extension(self, ext):
if ext.sources is None or not isinstance(ext.sources, (list, tuple)):
raise errors.DistutilsSetupError(
"in 'ext_modules' option (extension '%s'), "
"'sources' must be present and must be "
"a list of source filenames" % ext.name
)
ext_path = self.get_ext_fullpath(ext.name)
depends = ext.sources + ext.depends
is_outdated = modified.newer_group(depends, ext_path, "newer")
if self.force or is_outdated:
LOGGER.info("building '%s' extension", ext.name)
else:
LOGGER.debug("skipping '%s' extension (up-to-date)", ext.name)
return
ext_path = self.get_ext_fullpath(ext.name)
depends = ext.sources + ext.depends
if not (self.force or dep_util.newer_group(depends, ext_path, "newer")):
log.debug("skipping '%s' extension (up-to-date)", ext.name)
return
log.info("building '%s' extension", ext.name)
c_sources = []
for source in ext.sources:
if source.endswith(".c"):
c_sources.append(source)
extra_args = ext.extra_compile_args or []
c_sources = [source for source in ext.sources if source.endswith(".c")]
extra_args = ext.extra_compile_args or []
objects = []
objects = []
macros = ext.define_macros[:]
for undef in ext.undef_macros:
macros.append((undef,))
macros = ext.define_macros[:]
if self.compiler.compiler_type == "mingw32":
# On Windows Python 2.7, pyconfig.h defines "hypot" as "_hypot",
# This clashes with GCC's cmath, and causes compilation errors when
# building under MinGW: http://bugs.python.org/issue11566
macros.append(("_hypot", "hypot"))
for undef in ext.undef_macros:
macros.append((undef,))
objs = self.compiler.compile(
c_sources,
output_dir=self.build_temp,
macros=macros,
include_dirs=ext.include_dirs,
debug=self.debug,
extra_postargs=extra_args,
depends=ext.depends,
)
objects.extend(objs)
objs = self.compiler.compile(
c_sources,
output_dir=self.build_temp,
macros=macros,
include_dirs=ext.include_dirs,
debug=self.debug,
extra_postargs=extra_args,
depends=ext.depends,
)
objects.extend(objs)
self._built_objects = objects[:]
if ext.extra_objects:
objects.extend(ext.extra_objects)
extra_args = ext.extra_link_args or []
# When using GCC on Windows, we statically link libgcc and libstdc++,
# so that we don't need to package extra DLLs.
if self.compiler.compiler_type == "mingw32":
extra_args.extend(["-static-libgcc", "-static-libstdc++"])
self._built_objects = objects[:]
if ext.extra_objects:
objects.extend(ext.extra_objects)
extra_args = ext.extra_link_args or []
# when using GCC on Windows, we statically link libgcc and libstdc++,
# so that we don't need to package extra DLLs
if self.compiler.compiler_type == "mingw32":
extra_args.extend(["-static-libgcc", "-static-libstdc++"])
ext_path = self.get_ext_fullpath(ext.name)
# Detect target language, if not provided.
language = ext.language or self.compiler.detect_language(c_sources)
ext_path = self.get_ext_fullpath(ext.name)
# Detect target language, if not provided
language = ext.language or self.compiler.detect_language(c_sources)
self.compiler.link_shared_object(
objects,
ext_path,
libraries=self.get_libraries(ext),
library_dirs=ext.library_dirs,
runtime_library_dirs=ext.runtime_library_dirs,
extra_postargs=extra_args,
export_symbols=self.get_export_symbols(ext),
debug=self.debug,
build_temp=self.build_temp,
target_lang=language,
)
self.compiler.link_shared_object(
objects,
ext_path,
libraries=self.get_libraries(ext),
library_dirs=ext.library_dirs,
runtime_library_dirs=ext.runtime_library_dirs,
extra_postargs=extra_args,
export_symbols=self.get_export_symbols(ext),
debug=self.debug,
build_temp=self.build_temp,
target_lang=language,
)
NAME = "brotli"
@@ -148,8 +143,6 @@ CLASSIFIERS = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Intended Audience :: Developers",
# Deprecated, see https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license for details.
# "License :: OSI Approved :: MIT License",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX :: Linux",
@@ -178,154 +171,153 @@ PY_MODULES = ["brotli"]
USE_SYSTEM_BROTLI = bool_from_environ("USE_SYSTEM_BROTLI")
if USE_SYSTEM_BROTLI:
import pkgconfig
import pkgconfig
REQUIRED_BROTLI_SYSTEM_LIBRARIES = ["libbrotlicommon", "libbrotlienc", "libbrotlidec"]
REQUIRED_BROTLI_SYSTEM_LIBRARIES = [
"libbrotlicommon",
"libbrotlienc",
"libbrotlidec",
]
define_macros = []
include_dirs = []
libraries = []
library_dirs = []
define_macros = []
include_dirs = []
libraries = []
library_dirs = []
for required_system_library in REQUIRED_BROTLI_SYSTEM_LIBRARIES:
package_configuration = pkgconfig.parse(required_system_library)
for required_system_library in REQUIRED_BROTLI_SYSTEM_LIBRARIES:
package_configuration = pkgconfig.parse(required_system_library)
define_macros += package_configuration["define_macros"]
include_dirs += package_configuration["include_dirs"]
libraries += package_configuration["libraries"]
library_dirs += package_configuration["library_dirs"]
define_macros += package_configuration["define_macros"]
include_dirs += package_configuration["include_dirs"]
libraries += package_configuration["libraries"]
library_dirs += package_configuration["library_dirs"]
brotli_extension = Extension(
'_brotli',
sources=[
'python/_brotli.c'
],
include_dirs=include_dirs,
define_macros=define_macros,
libraries=libraries,
library_dirs=library_dirs
)
brotli_extension = setuptools.Extension(
"_brotli",
sources=["python/_brotli.c"],
include_dirs=include_dirs,
define_macros=define_macros,
libraries=libraries,
library_dirs=library_dirs,
)
EXT_MODULES = [brotli_extension]
EXT_MODULES = [brotli_extension]
else:
EXT_MODULES = [
Extension(
"_brotli",
sources=[
"python/_brotli.c",
"c/common/constants.c",
"c/common/context.c",
"c/common/dictionary.c",
"c/common/platform.c",
"c/common/shared_dictionary.c",
"c/common/transform.c",
"c/dec/bit_reader.c",
"c/dec/decode.c",
"c/dec/huffman.c",
"c/dec/prefix.c",
"c/dec/state.c",
"c/dec/static_init.c",
"c/enc/backward_references.c",
"c/enc/backward_references_hq.c",
"c/enc/bit_cost.c",
"c/enc/block_splitter.c",
"c/enc/brotli_bit_stream.c",
"c/enc/cluster.c",
"c/enc/command.c",
"c/enc/compound_dictionary.c",
"c/enc/compress_fragment.c",
"c/enc/compress_fragment_two_pass.c",
"c/enc/dictionary_hash.c",
"c/enc/encode.c",
"c/enc/encoder_dict.c",
"c/enc/entropy_encode.c",
"c/enc/fast_log.c",
"c/enc/histogram.c",
"c/enc/literal_cost.c",
"c/enc/memory.c",
"c/enc/metablock.c",
"c/enc/static_dict.c",
"c/enc/static_dict_lut.c",
"c/enc/static_init.c",
"c/enc/utf8_util.c",
],
depends=[
"c/common/constants.h",
"c/common/context.h",
"c/common/dictionary.h",
"c/common/platform.h",
"c/common/shared_dictionary_internal.h",
"c/common/static_init.h",
"c/common/transform.h",
"c/common/version.h",
"c/dec/bit_reader.h",
"c/dec/huffman.h",
"c/dec/prefix.h",
"c/dec/prefix_inc.h",
"c/dec/state.h",
"c/dec/static_init.h",
"c/enc/backward_references.h",
"c/enc/backward_references_hq.h",
"c/enc/backward_references_inc.h",
"c/enc/bit_cost.h",
"c/enc/bit_cost_inc.h",
"c/enc/block_encoder_inc.h",
"c/enc/block_splitter.h",
"c/enc/block_splitter_inc.h",
"c/enc/brotli_bit_stream.h",
"c/enc/cluster.h",
"c/enc/cluster_inc.h",
"c/enc/command.h",
"c/enc/compound_dictionary.h",
"c/enc/compress_fragment.h",
"c/enc/compress_fragment_two_pass.h",
"c/enc/dictionary_hash.h",
"c/enc/dictionary_hash_inc.h",
"c/enc/encoder_dict.h",
"c/enc/entropy_encode.h",
"c/enc/entropy_encode_static.h",
"c/enc/fast_log.h",
"c/enc/find_match_length.h",
"c/enc/hash.h",
"c/enc/hash_composite_inc.h",
"c/enc/hash_forgetful_chain_inc.h",
"c/enc/hash_longest_match64_inc.h",
"c/enc/hash_longest_match_inc.h",
"c/enc/hash_longest_match_quickly_inc.h",
"c/enc/hash_rolling_inc.h",
"c/enc/hash_to_binary_tree_inc.h",
"c/enc/histogram.h",
"c/enc/histogram_inc.h",
"c/enc/literal_cost.h",
"c/enc/memory.h",
"c/enc/metablock.h",
"c/enc/metablock_inc.h",
"c/enc/params.h",
"c/enc/prefix.h",
"c/enc/quality.h",
"c/enc/ringbuffer.h",
"c/enc/static_dict.h",
"c/enc/static_dict_lut.h",
"c/enc/static_init.h",
"c/enc/utf8_util.h",
"c/enc/write_bits.h",
],
include_dirs=[
"c/include",
]),
]
TEST_SUITE = "setup.get_test_suite"
sources = [
"python/_brotli.c",
"c/common/constants.c",
"c/common/context.c",
"c/common/dictionary.c",
"c/common/platform.c",
"c/common/shared_dictionary.c",
"c/common/transform.c",
"c/dec/bit_reader.c",
"c/dec/decode.c",
"c/dec/huffman.c",
"c/dec/prefix.c",
"c/dec/state.c",
"c/dec/static_init.c",
"c/enc/backward_references.c",
"c/enc/backward_references_hq.c",
"c/enc/bit_cost.c",
"c/enc/block_splitter.c",
"c/enc/brotli_bit_stream.c",
"c/enc/cluster.c",
"c/enc/command.c",
"c/enc/compound_dictionary.c",
"c/enc/compress_fragment.c",
"c/enc/compress_fragment_two_pass.c",
"c/enc/dictionary_hash.c",
"c/enc/encode.c",
"c/enc/encoder_dict.c",
"c/enc/entropy_encode.c",
"c/enc/fast_log.c",
"c/enc/histogram.c",
"c/enc/literal_cost.c",
"c/enc/memory.c",
"c/enc/metablock.c",
"c/enc/static_dict.c",
"c/enc/static_dict_lut.c",
"c/enc/static_init.c",
"c/enc/utf8_util.c",
]
headers = [
"c/common/constants.h",
"c/common/context.h",
"c/common/dictionary.h",
"c/common/platform.h",
"c/common/shared_dictionary_internal.h",
"c/common/static_init.h",
"c/common/transform.h",
"c/common/version.h",
"c/dec/bit_reader.h",
"c/dec/huffman.h",
"c/dec/prefix.h",
"c/dec/prefix_inc.h",
"c/dec/state.h",
"c/dec/static_init.h",
"c/enc/backward_references.h",
"c/enc/backward_references_hq.h",
"c/enc/backward_references_inc.h",
"c/enc/bit_cost.h",
"c/enc/bit_cost_inc.h",
"c/enc/block_encoder_inc.h",
"c/enc/block_splitter.h",
"c/enc/block_splitter_inc.h",
"c/enc/brotli_bit_stream.h",
"c/enc/cluster.h",
"c/enc/cluster_inc.h",
"c/enc/command.h",
"c/enc/compound_dictionary.h",
"c/enc/compress_fragment.h",
"c/enc/compress_fragment_two_pass.h",
"c/enc/dictionary_hash.h",
"c/enc/dictionary_hash_inc.h",
"c/enc/encoder_dict.h",
"c/enc/entropy_encode.h",
"c/enc/entropy_encode_static.h",
"c/enc/fast_log.h",
"c/enc/find_match_length.h",
"c/enc/hash.h",
"c/enc/hash_composite_inc.h",
"c/enc/hash_forgetful_chain_inc.h",
"c/enc/hash_longest_match64_inc.h",
"c/enc/hash_longest_match_inc.h",
"c/enc/hash_longest_match_quickly_inc.h",
"c/enc/hash_rolling_inc.h",
"c/enc/hash_to_binary_tree_inc.h",
"c/enc/histogram.h",
"c/enc/histogram_inc.h",
"c/enc/literal_cost.h",
"c/enc/memory.h",
"c/enc/metablock.h",
"c/enc/metablock_inc.h",
"c/enc/params.h",
"c/enc/prefix.h",
"c/enc/quality.h",
"c/enc/ringbuffer.h",
"c/enc/static_dict.h",
"c/enc/static_dict_lut.h",
"c/enc/static_init.h",
"c/enc/utf8_util.h",
"c/enc/write_bits.h",
]
brotli_extension = setuptools.Extension(
"_brotli",
sources=sources,
depends=headers,
include_dirs=["c/include"],
)
EXT_MODULES = [brotli_extension]
CMD_CLASS = {
"build_ext": BuildExt,
}
with open("README.md", "r") as f:
README = f.read()
README = f.read()
setup(
setuptools.setup(
name=NAME,
description=DESCRIPTION,
long_description=README,
@@ -339,6 +331,5 @@ setup(
package_dir=PACKAGE_DIR,
py_modules=PY_MODULES,
ext_modules=EXT_MODULES,
test_suite=TEST_SUITE,
cmdclass=CMD_CLASS,
)