mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
trying to do rice partitioning on gpu
This commit is contained in:
@@ -1192,11 +1192,12 @@ namespace CUETools.Codecs.FlaCuda
|
||||
|
||||
cuda.SetParameter(cudaCalcPartition, 0, (uint)task.cudaPartitions.Pointer);
|
||||
cuda.SetParameter(cudaCalcPartition, 1 * sizeof(uint), (uint)task.cudaResidual.Pointer);
|
||||
cuda.SetParameter(cudaCalcPartition, 2 * sizeof(uint), (uint)task.cudaBestResidualTasks.Pointer);
|
||||
cuda.SetParameter(cudaCalcPartition, 3 * sizeof(uint), (uint)max_porder);
|
||||
cuda.SetParameter(cudaCalcPartition, 4 * sizeof(uint), (uint)calcPartitionPartSize);
|
||||
cuda.SetParameter(cudaCalcPartition, 5 * sizeof(uint), (uint)calcPartitionPartCount);
|
||||
cuda.SetParameterSize(cudaCalcPartition, 6U * sizeof(uint));
|
||||
cuda.SetParameter(cudaCalcPartition, 2 * sizeof(uint), (uint)task.cudaSamples.Pointer);
|
||||
cuda.SetParameter(cudaCalcPartition, 3 * sizeof(uint), (uint)task.cudaBestResidualTasks.Pointer);
|
||||
cuda.SetParameter(cudaCalcPartition, 4 * sizeof(uint), (uint)max_porder);
|
||||
cuda.SetParameter(cudaCalcPartition, 5 * sizeof(uint), (uint)calcPartitionPartSize);
|
||||
cuda.SetParameter(cudaCalcPartition, 6 * sizeof(uint), (uint)calcPartitionPartCount);
|
||||
cuda.SetParameterSize(cudaCalcPartition, 7U * sizeof(uint));
|
||||
cuda.SetFunctionBlockShape(cudaCalcPartition, 16, 16, 1);
|
||||
|
||||
cuda.SetParameter(task.cudaSumPartition, 0, (uint)task.cudaPartitions.Pointer);
|
||||
@@ -1237,6 +1238,7 @@ namespace CUETools.Codecs.FlaCuda
|
||||
if (!encode_on_cpu)
|
||||
{
|
||||
int bsz = calcPartitionPartCount * calcPartitionPartSize;
|
||||
if (cudaCalcPartition.Pointer != task.cudaCalcPartition.Pointer)
|
||||
cuda.LaunchAsync(task.cudaEncodeResidual, residualPartCount, channels * task.frameCount, task.stream);
|
||||
cuda.LaunchAsync(cudaCalcPartition, (task.frameSize + bsz - 1) / bsz, channels * task.frameCount, task.stream);
|
||||
if (max_porder > 0)
|
||||
|
||||
@@ -824,6 +824,7 @@ extern "C" __global__ void cudaEncodeResidual(
|
||||
extern "C" __global__ void cudaCalcPartition(
|
||||
int* partition_lengths,
|
||||
int* residual,
|
||||
int* samples,
|
||||
encodeResidualTaskStruct *tasks,
|
||||
int max_porder, // <= 8
|
||||
int psize, // == (shared.task.blocksize >> max_porder), < 256
|
||||
@@ -831,8 +832,7 @@ extern "C" __global__ void cudaCalcPartition(
|
||||
)
|
||||
{
|
||||
__shared__ struct {
|
||||
int data[256];
|
||||
int length[256];
|
||||
int data[256+32];
|
||||
encodeResidualTaskStruct task;
|
||||
} shared;
|
||||
const int tid = threadIdx.x + (threadIdx.y << 4);
|
||||
@@ -841,33 +841,50 @@ extern "C" __global__ void cudaCalcPartition(
|
||||
__syncthreads();
|
||||
|
||||
const int parts = min(parts_per_block, (1 << max_porder) - blockIdx.x * parts_per_block);
|
||||
const int offs = blockIdx.x * psize * parts_per_block + tid;
|
||||
|
||||
// fetch residual
|
||||
int offs = blockIdx.x * psize * parts_per_block + tid;
|
||||
int s = (offs >= shared.task.residualOrder && tid < parts * psize) ? residual[shared.task.residualOffs + offs] : 0;
|
||||
// fetch samples
|
||||
if (tid < 32) shared.data[tid] = min(offs, tid + shared.task.residualOrder) >= 32 ? samples[shared.task.samplesOffs + offs - 32] >> shared.task.wbits : 0;
|
||||
shared.data[32 + tid] = tid < parts * psize ? samples[shared.task.samplesOffs + offs] >> shared.task.wbits : 0;
|
||||
__syncthreads();
|
||||
|
||||
// compute residual
|
||||
int s = 0;
|
||||
for (int c = -shared.task.residualOrder; c < 0; c++)
|
||||
s += __mul24(shared.data[32 + tid + c], shared.task.coefs[shared.task.residualOrder + c]);
|
||||
s = shared.data[32 + tid] - (s >> shared.task.shift);
|
||||
|
||||
if (offs >= shared.task.residualOrder && tid < parts * psize)
|
||||
residual[shared.task.residualOffs + offs] = s;
|
||||
else
|
||||
s = 0;
|
||||
|
||||
__syncthreads();
|
||||
// convert to unsigned
|
||||
shared.data[tid] = min(0xfffff, (s << 1) ^ (s >> 31));
|
||||
__syncthreads();
|
||||
|
||||
int sum = 0;
|
||||
s = (psize - shared.task.residualOrder * (threadIdx.y + blockIdx.x == 0)) * (threadIdx.x + 1);
|
||||
int dpos = threadIdx.y * psize;
|
||||
// calc number of unary bits for each residual part with each rice paramater
|
||||
#pragma unroll 0
|
||||
for (int i = 0; i < psize; i++)
|
||||
// for part (threadIdx.y) with this rice paramater (threadIdx.x)
|
||||
sum += shared.data[dpos + i] >> threadIdx.x;
|
||||
shared.length[tid] = sum + (psize - shared.task.residualOrder * (threadIdx.y + blockIdx.x == 0)) * (threadIdx.x + 1);
|
||||
s += shared.data[dpos + i] >> threadIdx.x;
|
||||
__syncthreads();
|
||||
shared.data[tid] = s;
|
||||
__syncthreads();
|
||||
|
||||
// output length (transposed: k is now threadIdx.y)
|
||||
const int pos = (15 << (max_porder + 1)) * blockIdx.y + (threadIdx.y << (max_porder + 1));
|
||||
if (threadIdx.y <= 14 && threadIdx.x < parts)
|
||||
partition_lengths[pos + blockIdx.x * parts_per_block + threadIdx.x] = shared.length[threadIdx.y + (threadIdx.x << 4)];
|
||||
partition_lengths[pos + blockIdx.x * parts_per_block + threadIdx.x] = shared.data[threadIdx.y + (threadIdx.x << 4)];
|
||||
}
|
||||
|
||||
extern "C" __global__ void cudaCalcPartition1(
|
||||
int* partition_lengths,
|
||||
int* residual,
|
||||
int* samples,
|
||||
encodeResidualTaskStruct *tasks,
|
||||
int max_porder, // <= 8
|
||||
int psize, // == (shared.task.blocksize >> max_porder), < 256
|
||||
@@ -916,6 +933,7 @@ extern "C" __global__ void cudaCalcPartition1(
|
||||
extern "C" __global__ void cudaCalcLargePartition(
|
||||
int* partition_lengths,
|
||||
int* residual,
|
||||
int* samples,
|
||||
encodeResidualTaskStruct *tasks,
|
||||
int max_porder, // <= 8
|
||||
int psize, // == >= 128
|
||||
@@ -1016,16 +1034,18 @@ extern "C" __global__ void cudaFindRiceParameter(
|
||||
shared.length[tid] = l1 = min(l1, l2);
|
||||
#pragma unroll 2
|
||||
for (int sh = 2; sh > 0; sh --)
|
||||
if (threadIdx.x < (1 << sh))
|
||||
{
|
||||
l2 = shared.length[tid + (1 << sh)];
|
||||
shared.index[tid] = shared.index[tid + ((l2 < l1) << sh)];
|
||||
shared.length[tid] = l1 = min(l1, l2);
|
||||
}
|
||||
if (threadIdx.x == 0 && threadIdx.y < parts)
|
||||
{
|
||||
l2 = shared.length[tid + 1];
|
||||
if (threadIdx.x == 0 && threadIdx.y < parts)
|
||||
shared.outidx[threadIdx.y] = shared.index[tid + (l2 < l1)];
|
||||
if (threadIdx.x == 0 && threadIdx.y < parts)
|
||||
shared.outlen[threadIdx.y] = min(l1, l2);
|
||||
}
|
||||
__syncthreads();
|
||||
// output rice parameter
|
||||
if (tid < parts)
|
||||
|
||||
@@ -711,7 +711,7 @@ code {
|
||||
code {
|
||||
name = cudaCalcPartition1
|
||||
lmem = 0
|
||||
smem = 3304
|
||||
smem = 3308
|
||||
reg = 11
|
||||
bar = 1
|
||||
const {
|
||||
@@ -731,67 +731,67 @@ code {
|
||||
0x308111fd 0x644107c8 0xa0012003 0x00000000
|
||||
0x30021019 0xc4100780 0x10012003 0x00000280
|
||||
0xa0004e05 0x04200780 0x30070209 0xc4100780
|
||||
0x30060205 0xc4100780 0x20018404 0x2101ec04
|
||||
0x30060205 0xc4100780 0x20018404 0x2101ee04
|
||||
0x20000c05 0x04004780 0xd00e0205 0x80c00780
|
||||
0x00000c05 0xc0000780 0x04061401 0xe4204780
|
||||
0x00000c05 0xc0000780 0x04061601 0xe4204780
|
||||
0xf0000001 0xe0000002 0x861ffe03 0x00000000
|
||||
0xa0004c0d 0x04200780 0x1100f204 0x1100f008
|
||||
0xa0004c0d 0x04200780 0x1100f404 0x1100f208
|
||||
0x4006061c 0x40050c24 0x10018029 0x00000003
|
||||
0x1000ce11 0x0423c780 0x30100e1d 0xc4100780
|
||||
0x1000d011 0x0423c780 0x30100e1d 0xc4100780
|
||||
0x30101225 0xc4100780 0x30041411 0xc4000780
|
||||
0x6006041d 0x0001c780 0x60040c0d 0x00024780
|
||||
0x30048e10 0x1100f008 0x40060625 0x00000780
|
||||
0x3004d211 0xa4200780 0x60070425 0x00024780
|
||||
0x30048e10 0x1100f208 0x40060625 0x00000780
|
||||
0x3004d411 0xa4200780 0x60070425 0x00024780
|
||||
0x40051029 0x00000780 0x30101225 0xc4100780
|
||||
0x60041229 0x00028780 0x60060405 0x00024780
|
||||
0xd0185005 0x20000780 0x3010140d 0xc4100780
|
||||
0xd0185805 0x20000780 0x3010140d 0xc4100780
|
||||
0x20000205 0x04020780 0x60041009 0x0000c780
|
||||
0x3401c1fd 0x6c20c7c8 0x300211fd 0x6c0042c8
|
||||
0xa0035003 0x00000000 0x10034003 0x00000100
|
||||
0xd018a005 0x20000780 0x2400c005 0x04204780
|
||||
0xd018a805 0x20000780 0x2400c005 0x04204780
|
||||
0x30020205 0xc4100780 0x2000ca05 0x04204780
|
||||
0xd00e0205 0x80c00780 0x10035003 0x00000780
|
||||
0x1000f805 0x0403c780 0x301f0209 0xec100782
|
||||
0x30010205 0xc4100780 0xd0010405 0x04008780
|
||||
0x00000c05 0xc0000780 0x30820205 0xac400780
|
||||
0x04001401 0xe4204780 0x861ffe03 0x00000000
|
||||
0x04001601 0xe4204780 0x861ffe03 0x00000000
|
||||
0x307c0a05 0x64008780 0x30000809 0x64010780
|
||||
0xd0830205 0x04400780 0xd0830409 0x04400780
|
||||
0xd002020d 0x04000780 0x307cd1fd 0x6c2107c8
|
||||
0xd002020d 0x04000780 0x307cd3fd 0x6c2107c8
|
||||
0x1000f821 0x0403c780 0x00000c05 0xc0000780
|
||||
0x1000f809 0x0403c780 0x04021401 0xe43f0780
|
||||
0x1005b003 0x00000100 0x1000d005 0x0423c780
|
||||
0x1000f809 0x0403c780 0x04021601 0xe43f0780
|
||||
0x1005b003 0x00000100 0x1000d205 0x0423c780
|
||||
0x40010425 0x00000780 0x60000625 0x00024780
|
||||
0x30101225 0xc4100780 0x60000429 0x00024780
|
||||
0x20001405 0x04014780 0x200a8225 0x00000003
|
||||
0x2000d029 0x04228780 0x00021205 0xc0000780
|
||||
0x20001405 0x04014780 0x200b8225 0x00000003
|
||||
0x2000d229 0x04228780 0x00021205 0xc0000780
|
||||
0xa005a003 0x00000000 0x20000a25 0x04028780
|
||||
0x3408c029 0xec200780 0x20108205 0x00000003
|
||||
0x20000409 0x04028780 0x00000c09 0xc0000780
|
||||
0x300903fd 0x6c0047d8 0xd4008005 0x20000780
|
||||
0x08021401 0xe4208780 0x10052003 0x00001280
|
||||
0x08021601 0xe4208780 0x10052003 0x00001280
|
||||
0xf0000001 0xe0000002 0x00000c05 0xc0000780
|
||||
0xd4085809 0x20000780 0x2800ce05 0x04208780
|
||||
0x04021401 0xe4204780 0x2800c605 0x04204780
|
||||
0x04021401 0xe4204780 0x2800c205 0x04204780
|
||||
0x04021401 0xe4204780 0x2800c005 0x04204780
|
||||
0x307c07fd 0x6c0087d8 0x04021401 0xe4204780
|
||||
0xd4086009 0x20000780 0x2800ce05 0x04208780
|
||||
0x04021601 0xe4204780 0x2800c605 0x04204780
|
||||
0x04021601 0xe4204780 0x2800c205 0x04204780
|
||||
0x04021601 0xe4204780 0x2800c005 0x04204780
|
||||
0x307c07fd 0x6c0087d8 0x04021601 0xe4204780
|
||||
0x30041009 0xc4101500 0x20000009 0x04009500
|
||||
0x00020405 0xc0001500 0x04041401 0xe4205500
|
||||
0x00020405 0xc0001500 0x04041601 0xe4205500
|
||||
0x20019021 0x00000003 0x308411fd 0x6c4147d8
|
||||
0x10043003 0x00001280 0x861ffe03 0x00000000
|
||||
0x300509fd 0x640107c8 0x308501fd 0x6440c2c8
|
||||
0x30000003 0x00000100 0xd0185005 0x20000780
|
||||
0x1000d00d 0x0423c780 0x20018009 0x00000003
|
||||
0x1000d005 0x0423c780 0x3503e00c 0x40030810
|
||||
0x30000003 0x00000100 0xd0185805 0x20000780
|
||||
0x1000d20d 0x0423c780 0x20018009 0x00000003
|
||||
0x1000d205 0x0423c780 0x3503e00c 0x40030810
|
||||
0x610f2e01 0x00000003 0x60020a29 0x00010780
|
||||
0x40070825 0x00000780 0x2101ee21 0x00000003
|
||||
0x40070825 0x00000780 0x2101f021 0x00000003
|
||||
0x20000e11 0x040147c0 0x30101429 0xc4100780
|
||||
0x60060a1d 0x00024780 0x30080015 0xc4000780
|
||||
0x60020801 0x00028780 0x30100e1d 0xc4100780
|
||||
0x20000805 0x04014780 0x00000c05 0xc0000780
|
||||
0x60060801 0x0001c100 0x30020205 0xc4100780
|
||||
0xd4105005 0x20000780 0x2101e804 0x2500e000
|
||||
0xd4105805 0x20000780 0x2101e804 0x2500e000
|
||||
0xd00e0201 0xa0c00781
|
||||
}
|
||||
}
|
||||
@@ -1107,45 +1107,45 @@ code {
|
||||
0xd808380d 0x20000780 0x1900ee18 0x1d00e004
|
||||
0x861ffe03 0x00000000 0x300603fd 0x6c0107c8
|
||||
0x20088a0d 0x00000003 0x10000a0d 0x0403c500
|
||||
0x30060219 0xac000780 0x2004801d 0x00000003
|
||||
0x04020e01 0xe420c780 0x00020e09 0xc0000780
|
||||
0x04000e01 0xe4218780 0x10000005 0x0403c780
|
||||
0x3806cffd 0x6c2047c8 0x10000e05 0x0403c280
|
||||
0x0002020d 0xc0000780 0xdc08380d 0x20000780
|
||||
0x3806ce0d 0xac200780 0x1c00c005 0x0423c780
|
||||
0x20028019 0x00000003 0x04020e01 0xe4204780
|
||||
0x00020c09 0xc0000780 0x04000e01 0xe420c780
|
||||
0x10000005 0x0403c780 0x3803cffd 0x6c2047c8
|
||||
0x10000c05 0x0403c280 0x0002020d 0xc0000780
|
||||
0xdc08380d 0x20000780 0x3803ce19 0xac200780
|
||||
0x1c00c005 0x0423c780 0x307c0a0d 0x64008780
|
||||
0x30040415 0x64010780 0x04020e01 0xe4204780
|
||||
0xd0830605 0x04400780 0xd0830a0d 0x04400780
|
||||
0x04000e01 0xe4218780 0xd0030215 0x040007c0
|
||||
0xa006c003 0x00000000 0x1400d005 0x0423c780
|
||||
0x1006c003 0x00000100 0x3406d005 0x6c204780
|
||||
0x30060219 0xac000780 0x04020e01 0xe420c780
|
||||
0x30020bfd 0xe41007c8 0xa0055003 0x00000000
|
||||
0x04000e01 0xe4218780 0x10055003 0x00000280
|
||||
0x2004800d 0x00000003 0x00020609 0xc0000780
|
||||
0x10000005 0x0403c780 0x3806cffd 0x6c2047c8
|
||||
0x10000605 0x0403c280 0x0002020d 0xc0000780
|
||||
0xdc08380d 0x20000780 0x3806ce19 0xac200780
|
||||
0x1c00c005 0x0423c780 0x04020e01 0xe4204780
|
||||
0x04000e01 0xe4218780 0x30010bfd 0xe41007ca
|
||||
0xa0063003 0x00000000 0x10063003 0x00000280
|
||||
0x2002800d 0x00000003 0x00020609 0xc0000780
|
||||
0x10000005 0x0403c780 0x3806cffd 0x6c2047c8
|
||||
0x10000605 0x0403c280 0x0002020d 0xc0000780
|
||||
0xdc08380d 0x20000780 0x3806ce19 0xac200780
|
||||
0x1c00c005 0x0423c780 0x04020e01 0xe4204780
|
||||
0x04000e01 0xe4218780 0x307c0bfd 0x640087ca
|
||||
0x300405fd 0x640102c8 0xa0070003 0x00000000
|
||||
0x10070003 0x00000100 0x3406d005 0x6c204780
|
||||
0x30000205 0x04000780 0x00020209 0xc0000780
|
||||
0xd8083809 0x20000780 0x0002080d 0xc0000780
|
||||
0x1500f004 0x1900e00c 0x0c044e01 0xe420c780
|
||||
0x307c0bfd 0x6c0087ca 0x30010c05 0xac000500
|
||||
0x00020809 0xc0000500 0x08040e01 0xe4204500
|
||||
0x861ffe03 0x00000000 0x30000405 0x6c0107d0
|
||||
0xa00003fd 0x0c0147c8 0xa0080003 0x00000000
|
||||
0x10080003 0x00001100 0x40054c11 0x00200780
|
||||
0xa0004e05 0x04200780 0x2102ec0d 0x00000003
|
||||
0x30100811 0xc4100780 0x30030205 0xc4000780
|
||||
0x60044c0d 0x00210780 0x20018604 0x20018004
|
||||
0xd4113809 0x20000780 0x3002020d 0xc4100780
|
||||
0x1900e004 0x2103e80c 0xd00e0605 0xa0c00780
|
||||
0xf0000001 0xe0000002 0x30000003 0x00000100
|
||||
0x2101ec0d 0x00000003 0x40054c19 0x00200780
|
||||
0x10018005 0x00000003 0xa0004e11 0x04200780
|
||||
0x2102ec15 0x00000003 0x30100c19 0xc4100780
|
||||
0x30030205 0xc4000780 0x3005080d 0xc4000780
|
||||
0x60044c09 0x00218780 0x20038204 0x20008400
|
||||
0x20000001 0x04004780 0xd4103805 0x20000780
|
||||
0x30020005 0xc4100780 0x1500e000 0x2101e804
|
||||
0xd00e0201 0xa0c00781
|
||||
0xd808380d 0x20000780 0x00020809 0xc0000780
|
||||
0x3406d00d 0xac200780 0x1c00c005 0x0423c780
|
||||
0x08044e01 0xe4204780 0x08040e01 0xe420c780
|
||||
0xf0000001 0xe0000002 0x861ffe03 0x00000000
|
||||
0x30000405 0x6c0107d0 0xa00003fd 0x0c0147c8
|
||||
0xa0081003 0x00000000 0x10081003 0x00001100
|
||||
0x40054c11 0x00200780 0xa0004e05 0x04200780
|
||||
0x2102ec0d 0x00000003 0x30100811 0xc4100780
|
||||
0x30030205 0xc4000780 0x60044c0d 0x00210780
|
||||
0x20018604 0x20018004 0xd4113809 0x20000780
|
||||
0x3002020d 0xc4100780 0x1900e004 0x2103e80c
|
||||
0xd00e0605 0xa0c00780 0xf0000001 0xe0000002
|
||||
0x30000003 0x00000100 0x2101ec0d 0x00000003
|
||||
0x40054c19 0x00200780 0x10018005 0x00000003
|
||||
0xa0004e11 0x04200780 0x2102ec15 0x00000003
|
||||
0x30100c19 0xc4100780 0x30030205 0xc4000780
|
||||
0x3005080d 0xc4000780 0x60044c09 0x00218780
|
||||
0x20038204 0x20008400 0x20000001 0x04004780
|
||||
0xd4103805 0x20000780 0x30020005 0xc4100780
|
||||
0x1500e000 0x2101e804 0xd00e0201 0xa0c00781
|
||||
}
|
||||
}
|
||||
code {
|
||||
@@ -1417,76 +1417,110 @@ code {
|
||||
code {
|
||||
name = cudaCalcPartition
|
||||
lmem = 0
|
||||
smem = 2280
|
||||
reg = 12
|
||||
smem = 1388
|
||||
reg = 11
|
||||
bar = 1
|
||||
const {
|
||||
segname = const
|
||||
segnum = 1
|
||||
offset = 0
|
||||
bytes = 16
|
||||
bytes = 24
|
||||
mem {
|
||||
0x000003ff 0x0000002f 0x000fffff 0x0000000e
|
||||
0x000003ff 0x0000002f 0x0000001f 0x00000001
|
||||
0x000fffff 0x0000000e
|
||||
}
|
||||
}
|
||||
bincode {
|
||||
0xd0800205 0x00400780 0xa0000211 0x04000780
|
||||
0xa0000019 0x04000780 0x30040801 0xc4100780
|
||||
0x20000c25 0x04000780 0x308113fd 0x644107c8
|
||||
0xa0011003 0x00000000 0x3002121d 0xc4100780
|
||||
0xd0800205 0x00400780 0xa0000209 0x04000780
|
||||
0xa0000019 0x04000780 0x30040401 0xc4100780
|
||||
0x20000c11 0x04000780 0x308109fd 0x644107c8
|
||||
0xa0011003 0x00000000 0x3002081d 0xc4100780
|
||||
0x10011003 0x00000280 0xa0004e01 0x04200780
|
||||
0x30070005 0xc4100780 0x30060001 0xc4100780
|
||||
0x20008200 0x2100ec00 0x20000e01 0x04000780
|
||||
0x20008200 0x2100ee00 0x20000e01 0x04000780
|
||||
0xd00e0001 0x80c00780 0x00000e05 0xc0000780
|
||||
0x04041401 0xe4200780 0xf0000001 0xe0000002
|
||||
0x861ffe03 0x00000000 0xa0004c09 0x04200780
|
||||
0x1100f004 0x1100f200 0x40040214 0x4003082c
|
||||
0x10018029 0x00000003 0x1000ce0d 0x0423c780
|
||||
0x30100a21 0xc4100780 0x30101615 0xc4100780
|
||||
0x3003140d 0xc4000780 0x60040021 0x00020780
|
||||
0x60020815 0x00014780 0x3003900c 0x1100f004
|
||||
0x400a0229 0x00000780 0x3003d20d 0xa4200780
|
||||
0x600b0029 0x00028780 0x40030c2d 0x00000780
|
||||
0x30101429 0xc4100780 0x60020e2d 0x0002c780
|
||||
0x600a0001 0x00028780 0xd0105005 0x20000780
|
||||
0x30101615 0xc4100780 0x20000001 0x04024780
|
||||
0x60020c05 0x00014780 0x3400c1fd 0x6c20c7c8
|
||||
0x300113fd 0x6c0042c8 0xd010a005 0x20000780
|
||||
0x2400c001 0x04200680 0x30020001 0xc4100680
|
||||
0x2000ca01 0x04200680 0xd00e0001 0x80c00680
|
||||
0x1000f801 0x0403c100 0x301f0005 0xec100780
|
||||
0x30010001 0xc4100780 0xd0000201 0x04008780
|
||||
0x00000e05 0xc0000780 0x30820001 0xac400780
|
||||
0x04001401 0xe4200780 0x861ffe03 0x00000000
|
||||
0x307cd1fd 0x6c20c7c8 0x1000f815 0x0403c780
|
||||
0x1004a003 0x00000280 0x1000d001 0x0423c780
|
||||
0x40090005 0x00000780 0x60080205 0x00004780
|
||||
0x30100205 0xc4100780 0x60080001 0x00004780
|
||||
0x200a8005 0x00000003 0x00020205 0xc0000780
|
||||
0xa0049003 0x00000000 0x2000d005 0x04200780
|
||||
0x20018001 0x00000003 0x3606c225 0xec200780
|
||||
0x300101fd 0x6c0147c8 0x20000a15 0x04024780
|
||||
0x10044003 0x00000280 0xf0000001 0xe0000002
|
||||
0x200005fd 0x040107c8 0xa005d003 0x00000000
|
||||
0x20018c05 0x00000003 0x10057003 0x00000280
|
||||
0xd0105005 0x20000780 0x1400c001 0x0423c780
|
||||
0x2040d001 0x04200780 0x40010409 0x00000780
|
||||
0x60000609 0x00008780 0x30100409 0xc4100780
|
||||
0x60000401 0x00008780 0x20000001 0x04014780
|
||||
0x1005d003 0x00000780 0x1000d001 0x0423c780
|
||||
0x40010409 0x00000780 0x60000609 0x00008780
|
||||
0x30100409 0xc4100780 0x60000401 0x00008780
|
||||
0x20000001 0x04014780 0x00000e05 0xc0000782
|
||||
0x04021401 0xe4200780 0x861ffe03 0x00000000
|
||||
0x300607fd 0x640107c8 0x308309fd 0x6440c2c8
|
||||
0x30000003 0x00000100 0x2101ee05 0x00000003
|
||||
0x100f8001 0x00000003 0x30010001 0xc4000780
|
||||
0x40014e09 0x00200780 0x30100409 0xc4100780
|
||||
0x60004e09 0x00208780 0x30040c0d 0xc4100780
|
||||
0x30010805 0xc4000780 0x20069000 0x2003880c
|
||||
0x20000405 0x04004780 0x00020605 0xc0000780
|
||||
0x20000001 0x04004780 0xd4085005 0x20000780
|
||||
0x30020005 0xc4100780 0x1500e000 0x2101e804
|
||||
0x04025601 0xe4200780 0xf0000001 0xe0000002
|
||||
0x861ffe03 0x00000000 0xa0004c05 0x04200780
|
||||
0x1000d201 0x0423c780 0x4002020d 0x00000780
|
||||
0x3010060d 0xc4100780 0x6002000d 0x0000c780
|
||||
0x1000d401 0x0423c780 0x40010c15 0x00000780
|
||||
0x60000e15 0x00014780 0x30100a15 0xc4100780
|
||||
0x60000c01 0x00014780 0x308209fd 0x6c4107c8
|
||||
0xa0033003 0x00000000 0x20000015 0x04010780
|
||||
0x10033003 0x00000280 0xd0095805 0x20000780
|
||||
0x2400c001 0x04210780 0x30000a01 0xac000780
|
||||
0x308201fd 0x6c40c7c8 0xa0031003 0x00000000
|
||||
0x10030003 0x00000280 0xd0096005 0x20000780
|
||||
0x2400c001 0x04214780 0x30020001 0xc4100780
|
||||
0x2000cc01 0x04200780 0x20008001 0x0ffffffb
|
||||
0xd00e000d 0x80c00780 0x1400d401 0x0423c780
|
||||
0x30000601 0xec000780 0x10031003 0x00000780
|
||||
0x1000f801 0x0403c780 0x00000e05 0xc0000782
|
||||
0x04001601 0xe4200780 0x1000d401 0x0423c782
|
||||
0x4003000d 0x00000780 0x60020225 0x0000c780
|
||||
0x10018021 0x00000003 0x1000d00d 0x0423c780
|
||||
0x30101225 0xc4100780 0x3003100d 0xc4000780
|
||||
0x60020021 0x00024780 0x3003900c 0x1100f200
|
||||
0x3003d40d 0xa4200780 0x40010c25 0x00000780
|
||||
0x60000e25 0x00024780 0x30101225 0xc4100780
|
||||
0x60000c01 0x00024780 0x30040001 0x6c0107d0
|
||||
0xa00001fd 0x0c0147c8 0xa004e003 0x00000000
|
||||
0x1004d003 0x00001100 0xd0096005 0x20000780
|
||||
0x2400c001 0x04214780 0x30020001 0xc4100780
|
||||
0x2000cc01 0x04200780 0xd00e0025 0x80c00780
|
||||
0x1400d401 0x0423c780 0x30001201 0xec000780
|
||||
0x1004e003 0x00000780 0x1000f801 0x0403c780
|
||||
0x00000e05 0xc0000782 0x04005601 0xe4200780
|
||||
0x861ffe03 0x00000000 0xd0095805 0x20000780
|
||||
0x3500e029 0x00000003 0x307c15fd 0x6c0187d8
|
||||
0x1000f825 0x0403c780 0x1400c001 0x0423c780
|
||||
0x10068003 0x00001280 0x20001401 0x04010780
|
||||
0x200b8001 0x00000003 0x102c8011 0x00000003
|
||||
0x00020009 0xc0000780 0x00000805 0xc0000780
|
||||
0x1000f811 0x0403c780 0xd4098011 0x20000780
|
||||
0xd801000d 0x20000780 0x20018811 0x00000003
|
||||
0x1000c001 0x0423c784 0xd0095811 0x20000780
|
||||
0x6c00c025 0x80224780 0x3004c1fd 0x6c2147dc
|
||||
0xd8000809 0x20000780 0xd4000805 0x20000780
|
||||
0x1000c001 0x0423c784 0x1005d003 0x00001280
|
||||
0xd0096809 0x20000780 0x30050029 0x6c00c780
|
||||
0x1000f811 0x0403c780 0x00000e05 0xc0000780
|
||||
0x1800c001 0x0423c780 0xa0001429 0x2c014780
|
||||
0x10000611 0x2440c280 0x30001201 0xec000780
|
||||
0xd4015805 0x20000780 0xd00a09fd 0x040007c8
|
||||
0x2440c025 0x04200780 0xd009a805 0x20000780
|
||||
0x2400c001 0x04214680 0x30020001 0xc4100680
|
||||
0x2000ca01 0x04200680 0xd00e0025 0xa0c00680
|
||||
0x1000f825 0x0403c100 0x861ffe03 0x00000000
|
||||
0x301f1211 0xec100780 0x30011201 0xc4100780
|
||||
0xd0000801 0x04008780 0x00000e05 0xc0000780
|
||||
0x30840001 0xac400780 0x04001601 0xe4200780
|
||||
0x861ffe03 0x00000000 0xd0095805 0x20000780
|
||||
0x1000d215 0x0423c780 0x20018c11 0x00000003
|
||||
0x1000d201 0x0423c780 0x3505e014 0x40011024
|
||||
0x60001225 0x00024780 0x400b1029 0x00000780
|
||||
0x30101225 0xc4100780 0x600a1229 0x00028780
|
||||
0x200003fd 0x040087c8 0x60001025 0x00024780
|
||||
0x30101401 0xc4100780 0x307cd3fd 0x6c20c7d8
|
||||
0x600a1025 0x00000100 0x1009f003 0x00001280
|
||||
0x1000d201 0x0423c780 0x40050005 0x00000780
|
||||
0x60040205 0x00004780 0x30100205 0xc4100780
|
||||
0x60040011 0x00004780 0x200b8801 0x00000003
|
||||
0x00020005 0xc0000780 0xa009e003 0x00000000
|
||||
0x2000d205 0x04210780 0x20018811 0x00000003
|
||||
0x3606c201 0xec200780 0x300109fd 0x6c0147c8
|
||||
0x20001225 0x04000780 0x10099003 0x00000280
|
||||
0xf0000001 0xe0000002 0x861ffe03 0x00000000
|
||||
0x00000e05 0xc0000780 0x04001601 0xe4224780
|
||||
0x861ffe03 0x00000000 0x300607fd 0x640107c8
|
||||
0x308505fd 0x6440c2c8 0x30000003 0x00000100
|
||||
0x2101f00d 0x00000003 0x100f8001 0x00000003
|
||||
0x30030005 0xc4000780 0x40034e01 0x00200780
|
||||
0x30100001 0xc4100780 0x60024e05 0x00200780
|
||||
0x3003040d 0xc4000780 0x20001011 0x04018780
|
||||
0x30040c01 0xc4100780 0x20038204 0x20008400
|
||||
0x20000805 0x04004780 0x00020005 0xc0000780
|
||||
0x30020205 0xc4100780 0x1500f600 0x2101e804
|
||||
0xd00e0201 0xa0c00781
|
||||
}
|
||||
}
|
||||
@@ -1515,7 +1549,7 @@ code {
|
||||
code {
|
||||
name = cudaCalcLargePartition
|
||||
lmem = 0
|
||||
smem = 2280
|
||||
smem = 2284
|
||||
reg = 10
|
||||
bar = 1
|
||||
const {
|
||||
@@ -1535,28 +1569,28 @@ code {
|
||||
0xa0011003 0x00000000 0x30020411 0xc4100780
|
||||
0x10011003 0x00000280 0xa0004e05 0x04200780
|
||||
0x30070215 0xc4100780 0x30060205 0xc4100780
|
||||
0x20018a04 0x2101ec04 0x20000805 0x04004780
|
||||
0x20018a04 0x2101ee04 0x20000805 0x04004780
|
||||
0xd00e0205 0x80c00780 0x00000805 0xc0000780
|
||||
0x04041401 0xe4204780 0xf0000001 0xe0000002
|
||||
0x861ffe03 0x00000000 0x307cd1fd 0x6c20c7c8
|
||||
0x04041601 0xe4204780 0xf0000001 0xe0000002
|
||||
0x861ffe03 0x00000000 0x307cd3fd 0x6c20c7c8
|
||||
0x1000f819 0x0403c780 0x10046003 0x00000280
|
||||
0xa0004c15 0x04200780 0x1000d005 0x0423c780
|
||||
0xa0004c15 0x04200780 0x1000d205 0x0423c780
|
||||
0x400a061d 0x00000780 0x30100e1d 0xc4100780
|
||||
0x600a0405 0x0001c780 0x3082d015 0xac600780
|
||||
0x2000d021 0x04204780 0xa0045003 0x00000000
|
||||
0x600a0405 0x0001c780 0x3082d215 0xac600780
|
||||
0x2000d221 0x04204780 0xa0045003 0x00000000
|
||||
0x30000bfd 0x6c0107c8 0x2001841c 0x20088420
|
||||
0x10000405 0x0403c780 0xd0105005 0x20000780
|
||||
0x3407c1fd 0x6c20c7d8 0x3001d1fd 0x6c2112d8
|
||||
0x10000405 0x0403c780 0xd0105805 0x20000780
|
||||
0x3407c1fd 0x6c20c7d8 0x3001d3fd 0x6c2112d8
|
||||
0xa002d003 0x00000000 0x1002c003 0x00001100
|
||||
0xd010a005 0x20000780 0x2400c009 0x0421c780
|
||||
0xd010a805 0x20000780 0x2400c009 0x0421c780
|
||||
0x30020409 0xc4100780 0x2000ca09 0x04208780
|
||||
0xd00e0425 0x80c00780 0x1002d003 0x00000780
|
||||
0x1000f825 0x0403c780 0x301f1209 0xec100782
|
||||
0x30011225 0xc4100780 0xd0090409 0x04008780
|
||||
0x00000805 0xc0000780 0x30830409 0xac400780
|
||||
0x04001401 0xe4208780 0x861ffe03 0x00000000
|
||||
0x04001601 0xe4208780 0x861ffe03 0x00000000
|
||||
0xa003f003 0x00000000 0x10000009 0x0403c780
|
||||
0x1003f003 0x00000100 0x200a8025 0x00000003
|
||||
0x1003f003 0x00000100 0x200b8025 0x00000003
|
||||
0x00021205 0xc0000780 0x20108409 0x00000003
|
||||
0x3403c025 0xec200780 0x30020bfd 0x6c0107d8
|
||||
0x20000c19 0x04024780 0xd4008005 0x20000780
|
||||
@@ -1565,24 +1599,24 @@ code {
|
||||
0x30080ffd 0x6c0047d8 0x20008205 0x00000013
|
||||
0x10021003 0x00001280 0xf0000001 0xe0000002
|
||||
0x30830c05 0xac400780 0x00000805 0xc0000780
|
||||
0x04021401 0xe4204780 0xd4085009 0x20000780
|
||||
0x1900f004 0x2901e004 0x04021401 0xe4204780
|
||||
0x1900e804 0x2901e004 0x04021401 0xe4204780
|
||||
0x1900e404 0x2901e004 0x04021401 0xe4204780
|
||||
0x04021601 0xe4204780 0xd4085809 0x20000780
|
||||
0x1900f004 0x2901e004 0x04021601 0xe4204780
|
||||
0x1900e804 0x2901e004 0x04021601 0xe4204780
|
||||
0x1900e404 0x2901e004 0x04021601 0xe4204780
|
||||
0x1800c205 0x0423c780 0x307c01fd 0x640087c8
|
||||
0x2800c001 0x04204780 0x308407fd 0x6440c2c8
|
||||
0x04021401 0xe4200780 0x30000003 0x00000100
|
||||
0xd0105005 0x20000780 0x2101ee19 0x00000003
|
||||
0x04021601 0xe4200780 0x30000003 0x00000100
|
||||
0xd0105805 0x20000780 0x2101f019 0x00000003
|
||||
0x100f8005 0x00000003 0x20018609 0x00000003
|
||||
0x1000d001 0x0423c780 0x30060205 0xc4000780
|
||||
0x1100f014 0x41032e1c 0x40000a24 0x3505e014
|
||||
0x1000d201 0x0423c780 0x30060205 0xc4000780
|
||||
0x1100f214 0x41032e1c 0x40000a24 0x3505e014
|
||||
0x30100e21 0xc4100780 0x6001081d 0x00024780
|
||||
0x3006060d 0xc4000780 0x400b0819 0x00000780
|
||||
0x60024e05 0x00220780 0x00000805 0xc0000780
|
||||
0x30100e1d 0xc4100780 0x600a0a11 0x00018780
|
||||
0x2000020d 0x0400c780 0xa0004c05 0x042007c0
|
||||
0x60000801 0x0001c780 0x30100811 0xc4100780
|
||||
0xd4085005 0x20000780 0x20000205 0x0400c780
|
||||
0xd4085805 0x20000780 0x20000205 0x0400c780
|
||||
0x600a0801 0x00010100 0x3483c009 0xac600780
|
||||
0x30020205 0xc4100780 0x20028000 0x2101e804
|
||||
0xd00e0201 0xa0c00781
|
||||
|
||||
Reference in New Issue
Block a user