framerip/frame_extractor.cpp

248 lines
7.4 KiB
C++

#include "frame_extractor.h"
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#include <stdexcept>
#include <iostream>
// ── RAII wrappers ────────────────────────────────────────────────────────────
struct FormatContextGuard {
AVFormatContext* ctx = nullptr;
~FormatContextGuard() { if (ctx) avformat_close_input(&ctx); }
};
struct CodecContextGuard {
AVCodecContext* ctx = nullptr;
~CodecContextGuard() { if (ctx) avcodec_free_context(&ctx); }
};
struct FrameGuard {
AVFrame* frame = nullptr;
FrameGuard() : frame(av_frame_alloc()) {}
~FrameGuard() { if (frame) av_frame_free(&frame); }
};
struct PacketGuard {
AVPacket* pkt = nullptr;
PacketGuard() : pkt(av_packet_alloc()) {}
~PacketGuard() { if (pkt) av_packet_free(&pkt); }
};
struct SwsContextGuard {
SwsContext* ctx = nullptr;
~SwsContextGuard() { if (ctx) sws_freeContext(ctx); }
};
// ── Impl ─────────────────────────────────────────────────────────────────────
struct FrameExtractor::Impl {
std::string path;
FormatContextGuard fmt;
CodecContextGuard codec;
int videoStreamIndex = -1;
int width = 0;
int height = 0;
double duration = 0.0;
double framerate = 0.0;
};
// ── Public API ────────────────────────────────────────────────────────────────
FrameExtractor::FrameExtractor(std::string path)
: m_path(std::move(path))
{
d = new Impl();
d->path = m_path;
}
FrameExtractor::~FrameExtractor()
{
delete d;
}
bool FrameExtractor::open()
{
// Open container
if (avformat_open_input(&d->fmt.ctx, d->path.c_str(), nullptr, nullptr) < 0) {
std::cerr << "[FrameExtractor] Cannot open file: " << d->path << "\n";
return false;
}
if (avformat_find_stream_info(d->fmt.ctx, nullptr) < 0) {
std::cerr << "[FrameExtractor] Cannot find stream info.\n";
return false;
}
// Find best video stream
const AVCodec* codec = nullptr;
d->videoStreamIndex = av_find_best_stream(
d->fmt.ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &codec, 0);
if (d->videoStreamIndex < 0 || !codec) {
std::cerr << "[FrameExtractor] No video stream found.\n";
return false;
}
AVStream* stream = d->fmt.ctx->streams[d->videoStreamIndex];
// Create and configure codec context
d->codec.ctx = avcodec_alloc_context3(codec);
if (!d->codec.ctx) {
std::cerr << "[FrameExtractor] Cannot allocate codec context.\n";
return false;
}
if (avcodec_parameters_to_context(d->codec.ctx, stream->codecpar) < 0) {
std::cerr << "[FrameExtractor] Cannot copy codec parameters.\n";
return false;
}
if (avcodec_open2(d->codec.ctx, codec, nullptr) < 0) {
std::cerr << "[FrameExtractor] Cannot open codec.\n";
return false;
}
d->width = d->codec.ctx->width;
d->height = d->codec.ctx->height;
// Duration in seconds
if (d->fmt.ctx->duration != AV_NOPTS_VALUE) {
d->duration = static_cast<double>(d->fmt.ctx->duration) / AV_TIME_BASE;
}
// Framerate in fps
AVRational fps = stream->avg_frame_rate;
d->framerate = static_cast<double>((double)fps.num / fps.den);
return true;
}
void FrameExtractor::forEachFrame(FrameCallback cb)
{
if (d->videoStreamIndex < 0) {
throw std::runtime_error("FrameExtractor::open() must succeed before forEachFrame()");
}
const int W = d->width;
const int H = d->height;
const AVPixelFormat srcFmt = d->codec.ctx->pix_fmt;
// SwsContext for pixel-format conversion → RGB24
SwsContextGuard sws;
sws.ctx = sws_getContext(
W, H, srcFmt,
W, H, AV_PIX_FMT_RGB24,
SWS_BILINEAR, nullptr, nullptr, nullptr);
if (!sws.ctx) {
throw std::runtime_error("sws_getContext failed");
}
// Allocate a dedicated RGB frame (destination)
FrameGuard rgbFrame;
if (!rgbFrame.frame) throw std::runtime_error("av_frame_alloc failed");
const int bufSize = av_image_get_buffer_size(AV_PIX_FMT_RGB24, W, H, 1);
std::vector<uint8_t> rgbBuffer(static_cast<size_t>(bufSize));
av_image_fill_arrays(
rgbFrame.frame->data, rgbFrame.frame->linesize,
rgbBuffer.data(), AV_PIX_FMT_RGB24, W, H, 1);
FrameGuard rawFrame;
PacketGuard pkt;
FrameData fd;
fd.width = W;
fd.height = H;
fd.pixels.resize(static_cast<size_t>(W) * H * 3);
bool keepGoing = true;
while (keepGoing && av_read_frame(d->fmt.ctx, pkt.pkt) >= 0) {
if (pkt.pkt->stream_index != d->videoStreamIndex) {
av_packet_unref(pkt.pkt);
continue;
}
if (avcodec_send_packet(d->codec.ctx, pkt.pkt) < 0) {
av_packet_unref(pkt.pkt);
continue;
}
av_packet_unref(pkt.pkt);
int ret;
while ((ret = avcodec_receive_frame(d->codec.ctx, rawFrame.frame)) >= 0) {
// Convert to RGB24
sws_scale(
sws.ctx,
rawFrame.frame->data, rawFrame.frame->linesize, 0, H,
rgbFrame.frame->data, rgbFrame.frame->linesize);
// Copy into FrameData (packed, no stride padding)
const uint8_t* src = rgbFrame.frame->data[0];
const int srcStride = rgbFrame.frame->linesize[0];
uint8_t* dst = fd.pixels.data();
const int rowBytes = W * 3;
for (int y = 0; y < H; ++y) {
std::copy(src, src + rowBytes, dst);
src += srcStride;
dst += rowBytes;
}
fd.pts = rawFrame.frame->pts;
keepGoing = cb(fd);
av_frame_unref(rawFrame.frame);
if (!keepGoing) break;
}
if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF && ret < 0) {
break; // real decode error
}
}
// Flush decoder
if (keepGoing) {
avcodec_send_packet(d->codec.ctx, nullptr);
int ret;
while ((ret = avcodec_receive_frame(d->codec.ctx, rawFrame.frame)) >= 0) {
sws_scale(
sws.ctx,
rawFrame.frame->data, rawFrame.frame->linesize, 0, H,
rgbFrame.frame->data, rgbFrame.frame->linesize);
const uint8_t* src = rgbFrame.frame->data[0];
const int srcStride = rgbFrame.frame->linesize[0];
uint8_t* dst = fd.pixels.data();
const int rowBytes = W * 3;
for (int y = 0; y < H; ++y) {
std::copy(src, src + rowBytes, dst);
src += srcStride;
dst += rowBytes;
}
fd.pts = rawFrame.frame->pts;
av_frame_unref(rawFrame.frame);
if (!cb(fd)) break;
}
}
}
int FrameExtractor::width() const { return d->width; }
int FrameExtractor::height() const { return d->height; }
double FrameExtractor::durationSeconds() const { return d->duration; }
double FrameExtractor::framerate() const { return d->framerate; }