Android's full process guide to using FFmpeg to implement video decoding

1. Architectural design

1.1 Overall architecture

Adopt a three-layer architecture design:

• Application layer: Provide user interface and UI display

• Business logic layer: manage decoding process and status

• Native layer: FFmpeg core decoding implementation

1.2 Status Management Plan

Use static constants instead of enumeration classes:

public class DecodeState {
    public static final int STATE_IDLE = 0;
    public static final int STATE_PREPARING = 1;
    public static final int STATE_READY = 2;
    public static final int STATE_DECODING = 3;
    public static final int STATE_PAUSED = 4;
    public static final int STATE_STOPPED = 5;
    public static final int STATE_ERROR = 6;
}

2. Core class implementation

2.1 Video frame data encapsulation class

public class VideoFrame {
    private final byte[] videoData;
    private final int width;
    private final int height;
    private final long pts;
    private final int format;
    private final int rotation;
    
    public VideoFrame(byte[] videoData, int width, int height, long pts, int format, int rotation) {
         = videoData;
         = width;
         = height;
         = pts;
         = format;
         = rotation;
    }
    
    // Getter method    public byte[] getVideoData() {
        return videoData;
    }
    
    public int getWidth() {
        return width;
    }
    
    public int getHeight() {
        return height;
    }
    
    public long getPts() {
        return pts;
    }
    
    public int getFormat() {
        return format;
    }
    
    public int getRotation() {
        return rotation;
    }
    
    // Convert to Bitmap    public Bitmap toBitmap() {
        YuvImage yuvImage = new YuvImage(videoData, ImageFormat.NV21, width, height, null);
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        (new Rect(0, 0, width, height), 100, os);
        byte[] jpegByteArray = ();
        Bitmap bitmap = (jpegByteArray, 0, );
        
        // Handle rotation        if (rotation != 0) {
            Matrix matrix = new Matrix();
            (rotation);
            bitmap = (bitmap, 0, 0, 
                                      (), (), 
                                      matrix, true);
        }
        
        return bitmap;
    }
}

2.2 Video decoder packaging class

public class VideoDecoder {
    // Decode state constants    public static final int STATE_IDLE = 0;
    public static final int STATE_PREPARING = 1;
    public static final int STATE_READY = 2;
    public static final int STATE_DECODING = 3;
    public static final int STATE_PAUSED = 4;
    public static final int STATE_STOPPED = 5;
    public static final int STATE_ERROR = 6;
    
    // Error code constant    public static final int ERROR_CODE_FILE_NOT_FOUND = 1001;
    public static final int ERROR_CODE_UNSUPPORTED_FORMAT = 1002;
    public static final int ERROR_CODE_DECODE_FAILED = 1003;
    
    private volatile int currentState = STATE_IDLE;
    private long nativeHandle;
    private Handler mainHandler;
    
    public interface DecodeListener {
        void onFrameDecoded(VideoFrame frame);
        void onDecodeFinished();
        void onErrorOccurred(int errorCode, String message);
        void onStateChanged(int newState);
    }
    
    public VideoDecoder() {
        nativeHandle = nativeInit();
        mainHandler = new Handler(());
    }
    
    public void prepare(String filePath) {
        if (currentState != STATE_IDLE) {
            notifyError(ERROR_CODE_DECODE_FAILED, "Decoder is not in idle state");
            return;
        }
        
        setState(STATE_PREPARING);
        
        new Thread(() -&gt; {
            boolean success = nativePrepare(nativeHandle, filePath);
            if (success) {
                setState(STATE_READY);
            } else {
                setState(STATE_ERROR);
                notifyError(ERROR_CODE_FILE_NOT_FOUND, "Failed to prepare decoder");
            }
        }).start();
    }
    
    public void startDecoding(DecodeListener listener) {
        if (currentState != STATE_READY &amp;&amp; currentState != STATE_PAUSED) {
            notifyError(ERROR_CODE_DECODE_FAILED, "Decoder is not ready");
            return;
        }
        
        setState(STATE_DECODING);
        
        new Thread(() -&gt; {
            nativeStartDecoding(nativeHandle, listener);
            setState(STATE_STOPPED);
        }).start();
    }
    
    public void pause() {
        if (currentState == STATE_DECODING) {
            setState(STATE_PAUSED);
            nativePause(nativeHandle);
        }
    }
    
    public void resume() {
        if (currentState == STATE_PAUSED) {
            setState(STATE_DECODING);
            nativeResume(nativeHandle);
        }
    }
    
    public void stop() {
        setState(STATE_STOPPED);
        nativeStop(nativeHandle);
    }
    
    public void release() {
        setState(STATE_STOPPED);
        nativeRelease(nativeHandle);
        nativeHandle = 0;
    }
    
    public int getCurrentState() {
        return currentState;
    }
    
    private void setState(int newState) {
        currentState = newState;
        (() -&gt; {
            if (listener != null) {
                (newState);
            }
        });
    }
    
    private void notifyError(int errorCode, String message) {
        (() -&gt; {
            if (listener != null) {
                (errorCode, message);
            }
        });
    }
    
    // Native method    private native long nativeInit();
    private native boolean nativePrepare(long handle, String filePath);
    private native void nativeStartDecoding(long handle, DecodeListener listener);
    private native void nativePause(long handle);
    private native void nativeResume(long handle);
    private native void nativeStop(long handle);
    private native void nativeRelease(long handle);
    
    static {
        ("avcodec");
        ("avformat");
        ("avutil");
        ("swscale");
        ("ffmpeg-wrapper");
    }
}

3. Native layer implementation

3.1 Context structure

typedef struct {
    AVFormatContext *format_ctx;
    AVCodecContext *codec_ctx;
    int video_stream_idx;
    SwsContext *sws_ctx;
    volatile int is_decoding;
    volatile int is_paused;
    int video_width;
    int video_height;
    int rotation;
} VideoDecodeContext;

3.2 JNI interface implementation

// Initialization decoderJNIEXPORT jlong JNICALL
Java_com_example_VideoDecoder_nativeInit(JNIEnv *env, jobject thiz) {
    VideoDecodeContext *ctx = (VideoDecodeContext *)malloc(sizeof(VideoDecodeContext));
    memset(ctx, 0, sizeof(VideoDecodeContext));
    ctx-&gt;is_decoding = 0;
    ctx-&gt;is_paused = 0;
    ctx-&gt;rotation = 0;
    return (jlong)ctx;
}

// Prepare the decoderJNIEXPORT jboolean JNICALL
Java_com_example_VideoDecoder_nativePrepare(JNIEnv *env, jobject thiz, 
                                          jlong handle, jstring file_path) {
    VideoDecodeContext *ctx = (VideoDecodeContext *)handle;
    const char *path = (*env)-&gt;GetStringUTFChars(env, file_path, NULL);
    
    // Open the media file    if (avformat_open_input(&amp;ctx-&gt;format_ctx, path, NULL, NULL) != 0) {
        LOGE("Could not open file: %s", path);
        (*env)-&gt;ReleaseStringUTFChars(env, file_path, path);
        return JNI_FALSE;
    }
    
    // Get streaming information    if (avformat_find_stream_info(ctx-&gt;format_ctx, NULL) &lt; 0) {
        LOGE("Could not find stream information");
        (*env)-&gt;ReleaseStringUTFChars(env, file_path, path);
        avformat_close_input(&amp;ctx-&gt;format_ctx);
        return JNI_FALSE;
    }
    
    // Find video streams    ctx-&gt;video_stream_idx = -1;
    for (int i = 0; i &lt; ctx-&gt;format_ctx-&gt;nb_streams; i++) {
        if (ctx-&gt;format_ctx-&gt;streams[i]-&gt;codecpar-&gt;codec_type == AVMEDIA_TYPE_VIDEO) {
            ctx-&gt;video_stream_idx = i;
            
            // Get video rotation information            AVDictionaryEntry *rotate_tag = av_dict_get(ctx-&gt;format_ctx-&gt;streams[i]-&gt;metadata, 
                                                       "rotate", NULL, 0);
            if (rotate_tag &amp;&amp; rotate_tag-&gt;value) {
                ctx-&gt;rotation = atoi(rotate_tag-&gt;value);
            }
            break;
        }
    }
    
    // Check whether the video stream is found    if (ctx-&gt;video_stream_idx == -1) {
        LOGE("Could not find video stream");
        (*env)-&gt;ReleaseStringUTFChars(env, file_path, path);
        avformat_close_input(&amp;ctx-&gt;format_ctx);
        return JNI_FALSE;
    }
    
    // Get the decoder parameters    AVCodecParameters *codec_params = ctx-&gt;format_ctx-&gt;streams[ctx-&gt;video_stream_idx]-&gt;codecpar;
    AVCodec *decoder = avcodec_find_decoder(codec_params-&gt;codec_id);
    if (!decoder) {
        LOGE("Unsupported codec");
        (*env)-&gt;ReleaseStringUTFChars(env, file_path, path);
        avformat_close_input(&amp;ctx-&gt;format_ctx);
        return JNI_FALSE;
    }
    
    // Create a decoding context    ctx-&gt;codec_ctx = avcodec_alloc_context3(decoder);
    avcodec_parameters_to_context(ctx-&gt;codec_ctx, codec_params);
    
    // Turn on the decoder    if (avcodec_open2(ctx-&gt;codec_ctx, decoder, NULL) &lt; 0) {
        LOGE("Could not open codec");
        (*env)-&gt;ReleaseStringUTFChars(env, file_path, path);
        avcodec_free_context(&amp;ctx-&gt;codec_ctx);
        avformat_close_input(&amp;ctx-&gt;format_ctx);
        return JNI_FALSE;
    }
    
    // Save the video size    ctx-&gt;video_width = ctx-&gt;codec_ctx-&gt;width;
    ctx-&gt;video_height = ctx-&gt;codec_ctx-&gt;height;
    
    (*env)-&gt;ReleaseStringUTFChars(env, file_path, path);
    return JNI_TRUE;
}

3.3 Core decoding logic

// Start decodingJNIEXPORT void JNICALL
Java_com_example_VideoDecoder_nativeStartDecoding(JNIEnv *env, jobject thiz, 
                                                jlong handle, jobject listener) {
    VideoDecodeContext *ctx = (VideoDecodeContext *)handle;
    ctx-&gt;is_decoding = 1;
    ctx-&gt;is_paused = 0;
    
    // Get Java callback methods and classes    jclass listener_class = (*env)-&gt;GetObjectClass(env, listener);
    jmethodID on_frame_method = (*env)-&gt;GetMethodID(env, listener_class, 
                                                  "onFrameDecoded", 
                                                  "(Lcom/example/VideoFrame;)V");
    jmethodID on_finish_method = (*env)-&gt;GetMethodID(env, listener_class, 
                                                   "onDecodeFinished", "()V");
    jmethodID on_error_method = (*env)-&gt;GetMethodID(env, listener_class, 
                                                  "onErrorOccurred", "(ILjava/lang/String;)V");
    
    // Assign frames and packets    AVFrame *frame = av_frame_alloc();
    AVFrame *rgb_frame = av_frame_alloc();
    AVPacket *packet = av_packet_alloc();
    
    // Prepare the image conversion context (convert to RGB24)    ctx-&gt;sws_ctx = sws_getContext(
        ctx-&gt;video_width, ctx-&gt;video_height, ctx-&gt;codec_ctx-&gt;pix_fmt,
        ctx-&gt;video_width, ctx-&gt;video_height, AV_PIX_FMT_RGB24,
        SWS_BILINEAR, NULL, NULL, NULL);
    
    if (!ctx-&gt;sws_ctx) {
        (*env)-&gt;CallVoidMethod(env, listener, on_error_method, 
                             VideoDecoder.ERROR_CODE_DECODE_FAILED,
                             (*env)-&gt;NewStringUTF(env, "Could not initialize sws context"));
        goto end;
    }
    
    // Allocate RGB buffer    int rgb_buffer_size = av_image_get_buffer_size(AV_PIX_FMT_RGB24, 
                                                  ctx-&gt;video_width, 
                                                  ctx-&gt;video_height, 1);
    uint8_t *rgb_buffer = (uint8_t *)av_malloc(rgb_buffer_size);
    av_image_fill_arrays(rgb_frame-&gt;data, rgb_frame-&gt;linesize, rgb_buffer,
                        AV_PIX_FMT_RGB24, ctx-&gt;video_width, 
                        ctx-&gt;video_height, 1);
    
    // Decoding loop    while (ctx-&gt;is_decoding &amp;&amp; av_read_frame(ctx-&gt;format_ctx, packet) &gt;= 0) {
        if (packet-&gt;stream_index == ctx-&gt;video_stream_idx) {
            // Send to the decoder            if (avcodec_send_packet(ctx-&gt;codec_ctx, packet) == 0) {
                // Receive the decoded frame                while (avcodec_receive_frame(ctx-&gt;codec_ctx, frame) == 0) {
                    if (!ctx-&gt;is_decoding) break;
                    
                    // Wait for the pause status to end                    while (ctx-&gt;is_paused &amp;&amp; ctx-&gt;is_decoding) {
                        usleep(10000); // 10ms
                    }
                    
                    if (!ctx-&gt;is_decoding) break;
                    
                    // Convert pixel format                    sws_scale(ctx-&gt;sws_ctx, (const uint8_t *const *)frame-&gt;data,
                             frame-&gt;linesize, 0, ctx-&gt;video_height,
                             rgb_frame-&gt;data, rgb_frame-&gt;linesize);
                    
                    // Create Java VideoFrame object                    jclass frame_class = (*env)-&gt;FindClass(env, "com/example/VideoFrame");
                    jmethodID frame_ctor = (*env)-&gt;GetMethodID(env, frame_class, 
                                                              "&lt;init&gt;", "([BIIJI)V");
                    
                    // Create byte array                    jbyteArray rgb_array = (*env)-&gt;NewByteArray(env, rgb_buffer_size);
                    (*env)-&gt;SetByteArrayRegion(env, rgb_array, 0, rgb_buffer_size, 
                                             (jbyte *)rgb_buffer);
                    
                    // Create VideoFrame object                    jobject video_frame = (*env)-&gt;NewObject(env, frame_class, frame_ctor,
                                                          rgb_array, 
                                                          ctx-&gt;video_width,
                                                          ctx-&gt;video_height,
                                                          frame-&gt;pts,
                                                          AV_PIX_FMT_RGB24,
                                                          ctx-&gt;rotation);
                    
                    // Callback to Java layer                    (*env)-&gt;CallVoidMethod(env, listener, on_frame_method, video_frame);
                    
                    // Release local reference                    (*env)-&gt;DeleteLocalRef(env, video_frame);
                    (*env)-&gt;DeleteLocalRef(env, rgb_array);
                }
            }
        }
        av_packet_unref(packet);
    }
    
    // Decoding completes callback    if (ctx-&gt;is_decoding) {
        (*env)-&gt;CallVoidMethod(env, listener, on_finish_method);
    }
    
end:
    // Free up resources    if (rgb_buffer) av_free(rgb_buffer);
    if (ctx-&gt;sws_ctx) sws_freeContext(ctx-&gt;sws_ctx);
    av_frame_free(&amp;frame);
    av_frame_free(&amp;rgb_frame);
    av_packet_free(&amp;packet);
}

IV. Use examples

public class VideoPlayerActivity extends AppCompatActivity 
    implements  {
    
    private VideoDecoder videoDecoder;
    private ImageView videoView;
    private Button btnPlay, btnPause, btnStop;
    
    @Override
    protected void onCreate(Bundle savedInstanceState) {
        (savedInstanceState);
        setContentView(.activity_video_player);
        
        videoView = findViewById(.video_view);
        btnPlay = findViewById(.btn_play);
        btnPause = findViewById(.btn_pause);
        btnStop = findViewById(.btn_stop);
        
        videoDecoder = new VideoDecoder();
        
        // Prepare video files        String videoPath = getExternalFilesDir(null) + "/test.mp4";
        
        // Click the monitor for the settings button        (v -&gt; {
            if (() == VideoDecoder.STATE_READY || 
                () == VideoDecoder.STATE_PAUSED) {
                (this);
            } else if (() == VideoDecoder.STATE_IDLE) {
                (videoPath);
            }
        });
        
        (v -&gt; {
            if (() == VideoDecoder.STATE_DECODING) {
                ();
            }
        });
        
        (v -&gt; {
            if (() != VideoDecoder.STATE_IDLE &amp;&amp; 
                () != VideoDecoder.STATE_STOPPED) {
                ();
            }
        });
    }
    
    @Override
    public void onFrameDecoded(VideoFrame frame) {
        runOnUiThread(() -&gt; {
            Bitmap bitmap = ();
            (bitmap);
        });
    }
    
    @Override
    public void onDecodeFinished() {
        runOnUiThread(() -&gt; {
            (this, "Decoding is complete", Toast.LENGTH_SHORT).show();
            (null);
        });
    }
    
    @Override
    public void onErrorOccurred(int errorCode, String message) {
        runOnUiThread(() -&gt; {
            String errorMsg = "mistake(" + errorCode + "): " + message;
            (this, errorMsg, Toast.LENGTH_LONG).show();
        });
    }
    
    @Override
    public void onStateChanged(int newState) {
        runOnUiThread(() -&gt; updateUI(newState));
    }
    
    private void updateUI(int state) {
        (state == VideoDecoder.STATE_READY || 
                         state == VideoDecoder.STATE_PAUSED ||
                         state == VideoDecoder.STATE_IDLE);
        
        (state == VideoDecoder.STATE_DECODING);
        (state == VideoDecoder.STATE_DECODING || 
                         state == VideoDecoder.STATE_PAUSED);
    }
    
    @Override
    protected void onDestroy() {
        ();
        ();
    }
}

5. Performance optimization suggestions

Render directly using Surface:

• Direct rendering of YUV data through ANativeWindow to avoid format conversion

• Reduce memory copy and Bitmap creation overhead

Hard decoding is preferred:

// Detect hardware decoder in nativePrepareAVCodec *decoder = NULL;
if (isHardwareDecodeSupported(codec_id)) {
    decoder = avcodec_find_decoder_by_name("h264_mediacodec");
}
if (!decoder) {
    decoder = avcodec_find_decoder(codec_id);
}

Framebuffer queue optimization:

• Implementing the producer-consumer model

• Set a reasonable queue size (3-5 frames)

• Frame drop strategy deals with video out-synchronization problem

Multithreaded processing:

• Separate decoding and rendering threads

• Use thread pool to process time-consuming operations

Memory multiplexing:

// Reuse AVPacket and AVFramestatic AVPacket *reuse_packet = NULL;
if (!reuse_packet) {
    reuse_packet = av_packet_alloc();
} else {
    av_packet_unref(reuse_packet);
}

Precise frame rate control:

//Control the decoding speed according to the frame rateAVRational frame_rate = ctx-&gt;format_ctx-&gt;streams[ctx-&gt;video_stream_idx]-&gt;avg_frame_rate;
double frame_delay = av_q2d(av_inv_q(frame_rate)) * 1000000; // Microseconds
int64_t last_frame_time = av_gettime();
while (decoding) {
    // ...Decoding logic...    
    int64_t current_time = av_gettime();
    int64_t elapsed = current_time - last_frame_time;
    if (elapsed &lt; frame_delay) {
        usleep(frame_delay - elapsed);
    }
    last_frame_time = av_gettime();
}

Low power optimization:

• Adjust the decoding strategy according to the device temperature

• Reduce frame rate or pause decoding in background

6. Compatibility processing

API version adaptation:

private static boolean isSurfaceTextureSupported() {
    return .SDK_INT >= Build.VERSION_CODES.ICE_CREAM_SANDWICH;
}

Permission processing:

private boolean checkStoragePermission() {
    if (.SDK_INT >= Build.VERSION_CODES.M) {
        return checkSelfPermission(.READ_EXTERNAL_STORAGE) 
               == PackageManager.PERMISSION_GRANTED;
    }
    return true;
}

ABI compatible:

android {
    defaultConfig {
        ndk {
            abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86', 'x86_64'
        }
    }
}

7. Error handling and logging

Perfect error handling:

public void onErrorOccurred(int errorCode, String message) {
    switch (errorCode) {
        case VideoDecoder.ERROR_CODE_FILE_NOT_FOUND:
            // There is no error in processing the file            break;
        case VideoDecoder.ERROR_CODE_UNSUPPORTED_FORMAT:
            // Handle unsupported format errors            break;
        default:
            // Handle unknown errors    }
}

Log system:

#define LOG_LEVEL_VERBOSE 1
#define LOG_LEVEL_DEBUG   2
#define LOG_LEVEL_INFO    3
#define LOG_LEVEL_WARN    4
#define LOG_LEVEL_ERROR   5

void log_print(int level, const char *tag, const char *fmt, ...) {
    if (level >= CURRENT_LOG_LEVEL) {
        va_list args;
        va_start(args, fmt);
        __android_log_vprint(level, tag, fmt, args);
        va_end(args);
    }
}

8. Extended functions

Video information acquisition:

public class VideoInfo {
    public int width;
    public int height;
    public long duration;
    public float frameRate;
    public int rotation;
}

// Add method in VideoDecoderpublic VideoInfo getVideoInfo() {
    return nativeGetVideoInfo(nativeHandle);
}

Video screenshot function:

public Bitmap captureFrame() {
    if (currentState == STATE_DECODING || currentState == STATE_PAUSED) {
        return nativeCaptureFrame(nativeHandle);
    }
    return null;
}

Video zoom control:

// Implement scaling in native layersws_scale(ctx-&gt;sws_ctx, frame-&gt;data, frame-&gt;linesize, 
         0, ctx-&gt;video_height, 
         scaled_frame-&gt;data, scaled_frame-&gt;linesize);

9. Test suggestions

Unit Tests:

@Test
public void testDecoderStates() {
    VideoDecoder decoder = new VideoDecoder();
    assertEquals(VideoDecoder.STATE_IDLE, ());
    
    ("test.mp4");
    // Wait for preparation to be completed    assertEquals(VideoDecoder.STATE_READY, ());
}

Performance Test:

long startTime = ();
// Perform decoding operationslong endTime = ();
("Performance", "Decoding time consuming: " + (endTime - startTime) + "ms");

Memory Leak Detection:

• Use Android Profiler to monitor memory usage

• Repeat creation of free decoder to check memory growth

10. Summary

The Android FFmpeg video decoding solution implemented in this article has the following characteristics:

High performance: Efficient decoding through Native layer optimization and reasonable memory management
High compatibility: Avoid enumeration classes, support a wide range of Android devices
Scalability: Modular design facilitates the addition of new features
Stability: Complete state management and error handling mechanism
Ease of use: clear API interface and complete documentation

Developers can expand on this basic framework according to actual needs, such as adding audio decoding, video filters and other functions to build a more complete media playback solution.

This is the article about the full process guide for Android using FFmpeg to implement video decoding. For more related content on Android FFmpeg video decoding, please search for my previous articles or continue browsing the related articles below. I hope everyone will support me in the future!