248 lines
7.4 KiB
C++
248 lines
7.4 KiB
C++
#include "frame_extractor.h"
|
|
|
|
extern "C" {
|
|
#include <libavcodec/avcodec.h>
|
|
#include <libavformat/avformat.h>
|
|
#include <libavutil/imgutils.h>
|
|
#include <libswscale/swscale.h>
|
|
}
|
|
|
|
#include <stdexcept>
|
|
#include <iostream>
|
|
|
|
// ── RAII wrappers ────────────────────────────────────────────────────────────
|
|
|
|
struct FormatContextGuard {
|
|
AVFormatContext* ctx = nullptr;
|
|
~FormatContextGuard() { if (ctx) avformat_close_input(&ctx); }
|
|
};
|
|
|
|
struct CodecContextGuard {
|
|
AVCodecContext* ctx = nullptr;
|
|
~CodecContextGuard() { if (ctx) avcodec_free_context(&ctx); }
|
|
};
|
|
|
|
struct FrameGuard {
|
|
AVFrame* frame = nullptr;
|
|
FrameGuard() : frame(av_frame_alloc()) {}
|
|
~FrameGuard() { if (frame) av_frame_free(&frame); }
|
|
};
|
|
|
|
struct PacketGuard {
|
|
AVPacket* pkt = nullptr;
|
|
PacketGuard() : pkt(av_packet_alloc()) {}
|
|
~PacketGuard() { if (pkt) av_packet_free(&pkt); }
|
|
};
|
|
|
|
struct SwsContextGuard {
|
|
SwsContext* ctx = nullptr;
|
|
~SwsContextGuard() { if (ctx) sws_freeContext(ctx); }
|
|
};
|
|
|
|
// ── Impl ─────────────────────────────────────────────────────────────────────
|
|
|
|
struct FrameExtractor::Impl {
|
|
std::string path;
|
|
|
|
FormatContextGuard fmt;
|
|
CodecContextGuard codec;
|
|
|
|
int videoStreamIndex = -1;
|
|
int width = 0;
|
|
int height = 0;
|
|
double duration = 0.0;
|
|
double framerate = 0.0;
|
|
};
|
|
|
|
// ── Public API ────────────────────────────────────────────────────────────────
|
|
|
|
FrameExtractor::FrameExtractor(std::string path)
|
|
: m_path(std::move(path))
|
|
{
|
|
d = new Impl();
|
|
d->path = m_path;
|
|
}
|
|
|
|
FrameExtractor::~FrameExtractor()
|
|
{
|
|
delete d;
|
|
}
|
|
|
|
bool FrameExtractor::open()
|
|
{
|
|
// Open container
|
|
if (avformat_open_input(&d->fmt.ctx, d->path.c_str(), nullptr, nullptr) < 0) {
|
|
std::cerr << "[FrameExtractor] Cannot open file: " << d->path << "\n";
|
|
return false;
|
|
}
|
|
|
|
if (avformat_find_stream_info(d->fmt.ctx, nullptr) < 0) {
|
|
std::cerr << "[FrameExtractor] Cannot find stream info.\n";
|
|
return false;
|
|
}
|
|
|
|
// Find best video stream
|
|
const AVCodec* codec = nullptr;
|
|
d->videoStreamIndex = av_find_best_stream(
|
|
d->fmt.ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &codec, 0);
|
|
|
|
if (d->videoStreamIndex < 0 || !codec) {
|
|
std::cerr << "[FrameExtractor] No video stream found.\n";
|
|
return false;
|
|
}
|
|
|
|
AVStream* stream = d->fmt.ctx->streams[d->videoStreamIndex];
|
|
|
|
// Create and configure codec context
|
|
d->codec.ctx = avcodec_alloc_context3(codec);
|
|
if (!d->codec.ctx) {
|
|
std::cerr << "[FrameExtractor] Cannot allocate codec context.\n";
|
|
return false;
|
|
}
|
|
|
|
if (avcodec_parameters_to_context(d->codec.ctx, stream->codecpar) < 0) {
|
|
std::cerr << "[FrameExtractor] Cannot copy codec parameters.\n";
|
|
return false;
|
|
}
|
|
|
|
if (avcodec_open2(d->codec.ctx, codec, nullptr) < 0) {
|
|
std::cerr << "[FrameExtractor] Cannot open codec.\n";
|
|
return false;
|
|
}
|
|
|
|
d->width = d->codec.ctx->width;
|
|
d->height = d->codec.ctx->height;
|
|
|
|
// Duration in seconds
|
|
if (d->fmt.ctx->duration != AV_NOPTS_VALUE) {
|
|
d->duration = static_cast<double>(d->fmt.ctx->duration) / AV_TIME_BASE;
|
|
}
|
|
|
|
// Framerate in fps
|
|
|
|
AVRational fps = stream->avg_frame_rate;
|
|
d->framerate = static_cast<double>((double)fps.num / fps.den);
|
|
|
|
return true;
|
|
}
|
|
|
|
void FrameExtractor::forEachFrame(FrameCallback cb)
|
|
{
|
|
if (d->videoStreamIndex < 0) {
|
|
throw std::runtime_error("FrameExtractor::open() must succeed before forEachFrame()");
|
|
}
|
|
|
|
const int W = d->width;
|
|
const int H = d->height;
|
|
const AVPixelFormat srcFmt = d->codec.ctx->pix_fmt;
|
|
|
|
// SwsContext for pixel-format conversion → RGB24
|
|
SwsContextGuard sws;
|
|
sws.ctx = sws_getContext(
|
|
W, H, srcFmt,
|
|
W, H, AV_PIX_FMT_RGB24,
|
|
SWS_BILINEAR, nullptr, nullptr, nullptr);
|
|
|
|
if (!sws.ctx) {
|
|
throw std::runtime_error("sws_getContext failed");
|
|
}
|
|
|
|
// Allocate a dedicated RGB frame (destination)
|
|
FrameGuard rgbFrame;
|
|
if (!rgbFrame.frame) throw std::runtime_error("av_frame_alloc failed");
|
|
|
|
const int bufSize = av_image_get_buffer_size(AV_PIX_FMT_RGB24, W, H, 1);
|
|
std::vector<uint8_t> rgbBuffer(static_cast<size_t>(bufSize));
|
|
|
|
av_image_fill_arrays(
|
|
rgbFrame.frame->data, rgbFrame.frame->linesize,
|
|
rgbBuffer.data(), AV_PIX_FMT_RGB24, W, H, 1);
|
|
|
|
FrameGuard rawFrame;
|
|
PacketGuard pkt;
|
|
|
|
FrameData fd;
|
|
fd.width = W;
|
|
fd.height = H;
|
|
fd.pixels.resize(static_cast<size_t>(W) * H * 3);
|
|
|
|
bool keepGoing = true;
|
|
|
|
while (keepGoing && av_read_frame(d->fmt.ctx, pkt.pkt) >= 0) {
|
|
if (pkt.pkt->stream_index != d->videoStreamIndex) {
|
|
av_packet_unref(pkt.pkt);
|
|
continue;
|
|
}
|
|
|
|
if (avcodec_send_packet(d->codec.ctx, pkt.pkt) < 0) {
|
|
av_packet_unref(pkt.pkt);
|
|
continue;
|
|
}
|
|
av_packet_unref(pkt.pkt);
|
|
|
|
int ret;
|
|
while ((ret = avcodec_receive_frame(d->codec.ctx, rawFrame.frame)) >= 0) {
|
|
// Convert to RGB24
|
|
sws_scale(
|
|
sws.ctx,
|
|
rawFrame.frame->data, rawFrame.frame->linesize, 0, H,
|
|
rgbFrame.frame->data, rgbFrame.frame->linesize);
|
|
|
|
// Copy into FrameData (packed, no stride padding)
|
|
const uint8_t* src = rgbFrame.frame->data[0];
|
|
const int srcStride = rgbFrame.frame->linesize[0];
|
|
uint8_t* dst = fd.pixels.data();
|
|
const int rowBytes = W * 3;
|
|
|
|
for (int y = 0; y < H; ++y) {
|
|
std::copy(src, src + rowBytes, dst);
|
|
src += srcStride;
|
|
dst += rowBytes;
|
|
}
|
|
|
|
fd.pts = rawFrame.frame->pts;
|
|
|
|
keepGoing = cb(fd);
|
|
av_frame_unref(rawFrame.frame);
|
|
|
|
if (!keepGoing) break;
|
|
}
|
|
|
|
if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF && ret < 0) {
|
|
break; // real decode error
|
|
}
|
|
}
|
|
|
|
// Flush decoder
|
|
if (keepGoing) {
|
|
avcodec_send_packet(d->codec.ctx, nullptr);
|
|
int ret;
|
|
while ((ret = avcodec_receive_frame(d->codec.ctx, rawFrame.frame)) >= 0) {
|
|
sws_scale(
|
|
sws.ctx,
|
|
rawFrame.frame->data, rawFrame.frame->linesize, 0, H,
|
|
rgbFrame.frame->data, rgbFrame.frame->linesize);
|
|
|
|
const uint8_t* src = rgbFrame.frame->data[0];
|
|
const int srcStride = rgbFrame.frame->linesize[0];
|
|
uint8_t* dst = fd.pixels.data();
|
|
const int rowBytes = W * 3;
|
|
for (int y = 0; y < H; ++y) {
|
|
std::copy(src, src + rowBytes, dst);
|
|
src += srcStride;
|
|
dst += rowBytes;
|
|
}
|
|
|
|
fd.pts = rawFrame.frame->pts;
|
|
av_frame_unref(rawFrame.frame);
|
|
|
|
if (!cb(fd)) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
int FrameExtractor::width() const { return d->width; }
|
|
int FrameExtractor::height() const { return d->height; }
|
|
double FrameExtractor::durationSeconds() const { return d->duration; }
|
|
double FrameExtractor::framerate() const { return d->framerate; }
|