I'm still somewhat new to C, so pardon me for any silly mistakes I've made.
I'm trying to convert video frames into Minecraft map colors using JNI
and FFmpeg
's av
(and swscale
) libraries. The code works below but it's sluggish and I'm going to assume it's prone to the branch prediction problem.
The Minecraft map data is stored in ctx->data
; it's a 16384 byte array that's fetched from a direct ByteBuffer
via GetDirectBufferAddress
.
convertRow
is called from a thread pool (taken from here); the thread pool has 4 threads to use.
Current specs of the computer:
8GB of RAM
i5 650 @ 3.19GHz
// http://www.codeproject.com/Articles/69941/Best-Square-Root-Method-Algorithm-Function-Precisi
float fast_sqrt(const float x){
union
{
int i;
float x;
} u;
u.x = x;
u.i = (1<<29) + (u.i >> 1) - (1<<22);
return u.x;
}
// forgot where this was from; TODO: find source
double dist(unsigned char r, unsigned char g, unsigned char b, Color e2) {
int rmean = (r + e2.r ) / 2;
int rDiff = r - e2.r;
int gDiff = g - e2.g;
int bDiff = b - e2.b;
return fast_sqrt((((512+rmean)*rDiff*rDiff)>>8) + 4*gDiff*gDiff + (((767-rmean)*bDiff*bDiff)>>8));
}
char matchColor(unsigned char r, unsigned char g, unsigned char b){
int index = 0;
double best = -1;
double distance;
for (int i = 4; i < ARR_LENGTH(mapColors); ++i) {
distance = dist(r, g, b, mapColors[i]);
if (distance < best || best == -1.0f) {
best = distance;
index = i;
}
}
return (char)(index < 143 ? index : -144 + (index - 143));
}
static void convertRow(void* ptr){
param* params = (param*)ptr;
int p;
unsigned char r, g, b;
for (int y = 0; y < params->ctx->height; y++) {
p = params->x * 3 + y * params->ctx->rgbFrame->linesize[0];
r = params->ctx->rgbFrame->data[0][p];
g = params->ctx->rgbFrame->data[0][p + 1];
b = params->ctx->rgbFrame->data[0][p + 2];
params->ctx->data[(params->x / 128) + (params->ctx->width / 128) * (y / 128)][((params->x % 128) + 128 * (y % 128))] = matchColor(r, g, b);
}
latch_countdown(params->ctx->latch);
}
JNIEXPORT jboolean JNICALL Java_ga_nurupeaches_vivianmap_natives_NativeVideo_n_1readFrame(JNIEnv* env, jobject jthis, jlong ptr){
Context* ctx = (Context*)ptr;
while((ctx->read = av_read_frame(ctx->formatCtx, &(ctx->packetIn))) >= 0){
if(ctx->packetIn.stream_index == ctx->vtrackId){
avcodec_decode_video2(ctx->codecCtx, ctx->rawFrame, &(ctx->finished), &(ctx->packetIn));
if(ctx->finished){
sws_scale(ctx->sws, (const uint8_t* const*)ctx->rawFrame->data,
ctx->rawFrame->linesize, 0, ctx->codecCtx->height,
ctx->rgbFrame->data, ctx->rgbFrame->linesize);
break;
}
}
}
if(ctx->read < 0){
// bad. don't do this.
// stop doing this me.
Java_ga_nurupeaches_vivianmap_natives_NativeVideo_n_1close(env, jthis, ptr);
}
// lock count mutex so that we don't have a race condition
latch_lockCount(ctx->latch);
for(int x=0; x < ctx->width; x++){
param* params = malloc(sizeof(param));
params->ctx = ctx;
params->x = x;
threadpool_add(ctx->pool, convertRow, params, 0);
}
// unlock mutex after all tasks have been submitted
latch_unlockCount(ctx->latch);
// wait for count to hit 0
latch_wait(ctx->latch);
// reset latch
latch_reset(ctx->latch);
return true;
}
Any suggestions (especially performance improvements) would be appreciated! I'm willing to edit more code into the question if you need it; I just didn't because I didn't want it to become a needle in a haystack problem.