Merge pull request #284 from cchampet/refactor_VideoPropertiesAnalyseFirstGOP

cchampet · web-flow · commit 3d0dc4a19c32 · 2016-11-04T09:31:05.000+01:00
VideoProperties: improve how to analyse the first GOP
diff --git a/src/AvTranscoder/file/InputFile.cpp b/src/AvTranscoder/file/InputFile.cpp
@@ -30,7 +30,7 @@ InputFile::InputFile(const std::string& filename)
     _formatContext.findStreamInfo();
 
     // Get the stream information as properties
-    _properties = new FileProperties(_formatContext);
+    _properties = new FileProperties(*this);
 
     // Create streams
     for(size_t streamIndex = 0; streamIndex < _formatContext.getNbStreams(); ++streamIndex)
diff --git a/src/AvTranscoder/file/InputFile.hpp b/src/AvTranscoder/file/InputFile.hpp
@@ -79,7 +79,7 @@ class AvExport InputFile
 
     std::string getFilename() const { return _filename; }
 
-    FormatContext& getFormatContext() { return _formatContext; }
+    const FormatContext& getFormatContext() const { return _formatContext; }
 
     /**
      * @brief Set the format of the input file
diff --git a/src/AvTranscoder/properties/FileProperties.cpp b/src/AvTranscoder/properties/FileProperties.cpp
@@ -11,9 +11,10 @@
 namespace avtranscoder
 {
 
-FileProperties::FileProperties(const FormatContext& formatContext)
-    : _formatContext(&formatContext)
-    , _avFormatContext(&formatContext.getAVFormatContext())
+FileProperties::FileProperties(const InputFile& file)
+    : _file(file)
+    , _formatContext(&file.getFormatContext())
+    , _avFormatContext(&file.getFormatContext().getAVFormatContext())
     , _videoStreams()
     , _audioStreams()
     , _dataStreams()
@@ -31,8 +32,8 @@ FileProperties::FileProperties(const FormatContext& formatContext)
 void FileProperties::extractStreamProperties(IProgress& progress, const EAnalyseLevel level)
 {
     // Returns at the beginning of the stream before any deep analysis
-    if(level > eAnalyseLevelHeader && !isRawFormat())
-        const_cast<FormatContext*>(_formatContext)->seek(0, AVSEEK_FLAG_BACKWARD);
+    if(level > eAnalyseLevelHeader && ! isRawFormat())
+        const_cast<InputFile&>(_file).seekAtFrame(0, AVSEEK_FLAG_BACKWARD);
 
     // clear properties
     clearStreamProperties();
@@ -123,8 +124,8 @@ void FileProperties::extractStreamProperties(IProgress& progress, const EAnalyse
     }
 
     // Returns at the beginning of the stream after any deep analysis
-    if(level > eAnalyseLevelHeader && !isRawFormat())
-        const_cast<FormatContext*>(_formatContext)->seek(0, AVSEEK_FLAG_BACKWARD);
+    if(level > eAnalyseLevelHeader && ! isRawFormat())
+        const_cast<InputFile&>(_file).seekAtFrame(0, AVSEEK_FLAG_BACKWARD);
 }
 
 std::string FileProperties::getFilename() const
diff --git a/src/AvTranscoder/properties/FileProperties.hpp b/src/AvTranscoder/properties/FileProperties.hpp
@@ -3,7 +3,7 @@
 
 #include <AvTranscoder/common.hpp>
 #include <AvTranscoder/properties/util.hpp>
-#include <AvTranscoder/file/FormatContext.hpp>
+#include <AvTranscoder/file/InputFile.hpp>
 #include <AvTranscoder/progress/IProgress.hpp>
 
 #include <AvTranscoder/properties/StreamProperties.hpp>
@@ -29,7 +29,7 @@ class AvExport FileProperties
      * @note The default streams analyse level is eAnalyseLevelHeader
      * @see FormatContext
      */
-    FileProperties(const FormatContext& formatContext);
+    FileProperties(const InputFile& file);
 
     /**
      * @brief Relaunch streams analysis with a specific level.
@@ -62,7 +62,7 @@ class AvExport FileProperties
     size_t getNbAttachementStreams() const { return _attachementStreams.size(); }
     size_t getNbUnknownStreams() const { return _unknownStreams.size(); }
 
-    const FormatContext& getFormatContext() const { return *_formatContext; }
+    const InputFile& getInputFile() const { return _file; }
 
     //@{
     // @brief Get the properties at the indicated stream index
@@ -110,6 +110,7 @@ class AvExport FileProperties
     void clearStreamProperties(); ///< Clear all array of stream properties
 
 private:
+    const InputFile& _file; ///< Has link (no ownership)
     const FormatContext* _formatContext;     ///< Has link (no ownership)
     const AVFormatContext* _avFormatContext; ///< Has link (no ownership)
 
diff --git a/src/AvTranscoder/properties/VideoProperties.cpp b/src/AvTranscoder/properties/VideoProperties.cpp
@@ -1,9 +1,11 @@
 #include "VideoProperties.hpp"
 
+#include <AvTranscoder/util.hpp>
+#include <AvTranscoder/decoder/VideoDecoder.hpp>
+#include <AvTranscoder/data/decoded/VideoFrame.hpp>
 #include <AvTranscoder/properties/util.hpp>
 #include <AvTranscoder/properties/FileProperties.hpp>
 #include <AvTranscoder/progress/NoDisplayProgress.hpp>
-#include <AvTranscoder/data/decoded/VideoFrame.hpp>
 
 extern "C" {
 #include <libavutil/avutil.h>
@@ -328,78 +330,20 @@ size_t VideoProperties::getBitRate() const
     if(_codecContext->bit_rate || _codecContext->rc_max_rate)
         return _codecContext->bit_rate;
 
-    LOG_WARN("The bitrate of the stream '" << _streamIndex << "' of file '" << _formatContext->filename << "' is unknown.")
-    LOG_INFO("Compute the video bitrate by decoding the first GOP.")
-
-    if(!_codecContext->width || !_codecContext->height)
-        throw std::runtime_error("cannot compute bit rate: invalid frame size");
-
-    if(!_formatContext || !_codec)
-        throw std::runtime_error("cannot compute bit rate: unknown format or codec");
-    if(!_codecContext->width || !_codecContext->height)
-        throw std::runtime_error("cannot compute bit rate: invalid frame size");
-
-    // discard no frame type when decode
-    _codecContext->skip_frame = AVDISCARD_NONE;
-
-    AVPacket pkt;
-    av_init_packet(&pkt);
-
-    avcodec_open2(_codecContext, _codec, NULL);
-
-    VideoFrame frame(VideoFrameDesc(getWidth(), getHeight(), getPixelProperties().getPixelFormatName()), false);
-    AVFrame& avFrame = frame.getAVFrame();
-
-    int gotFrame = 0;
-    size_t nbDecodedFrames = 0;
-    int gopFramesSize = 0;
-    int positionOfFirstKeyFrame = -1;
-    int positionOfLastKeyFrame = -1;
-
-    while(!av_read_frame(const_cast<AVFormatContext*>(_formatContext), &pkt))
+    if(_levelAnalysis == eAnalyseLevelHeader)
     {
-        if(pkt.stream_index == (int)_streamIndex)
-        {
-            avcodec_decode_video2(_codecContext, &avFrame, &gotFrame, &pkt);
-            if(gotFrame)
-            {
-                // check distance between key frames
-                if(avFrame.pict_type == AV_PICTURE_TYPE_I)
-                {
-                    if(positionOfFirstKeyFrame == -1)
-                        positionOfFirstKeyFrame = nbDecodedFrames;
-                    else
-                        positionOfLastKeyFrame = nbDecodedFrames;
-                }
-                ++nbDecodedFrames;
-
-                // added size of all frames of the same gop
-                if(positionOfLastKeyFrame == -1)
-                {
-#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(54, 7, 100)
-                    gopFramesSize += av_frame_get_pkt_size(&avFrame);
-#else
-                    gopFramesSize += pkt.size;
-#endif
-                }
-            }
-        }
-        av_free_packet(&pkt);
-        if(positionOfFirstKeyFrame != -1 && positionOfLastKeyFrame != -1)
-            break;
+        LOG_WARN("The bitrate of the stream '" << _streamIndex << "' of file '" << _formatContext->filename << "' is unknown. "
+                "Need a deeper analysis: see eAnalyseLevelFirstGop.")
+        return 0;
     }
-    // Close a given AVCodecContext and free all the data associated with it (but not the AVCodecContext itself)
-    avcodec_close(_codecContext);
-    // Returns at the beginning of the stream
-    const_cast<FormatContext*>(&_fileProperties->getFormatContext())->seek(0, AVSEEK_FLAG_BYTE);
 
-    const size_t gopSize = positionOfLastKeyFrame - positionOfFirstKeyFrame;
-    if(gopSize > 0)
+    LOG_INFO("Estimate the video bitrate from the first GOP.")
+    size_t gopFramesSize = 0;
+    for(size_t picture = 0; picture < _gopStructure.size(); ++picture)
     {
-        const float fps = av_q2d(_formatContext->streams[_streamIndex]->avg_frame_rate);
-        return (gopFramesSize / gopSize) * 8 * fps;
+        gopFramesSize += _gopStructure.at(picture).second;
     }
-    return 0;
+    return (gopFramesSize / getGopSize()) * 8 * getFps();
 }
 
 size_t VideoProperties::getMaxBitRate() const
@@ -556,69 +500,59 @@ std::vector<std::pair<char, int> > VideoProperties::getGopStructure() const
 
 void VideoProperties::analyseGopStructure(IProgress& progress)
 {
-    if(_formatContext && _codecContext && _codec)
+    if(! _formatContext || ! _codecContext || ! _codec)
+        return;
+    if(! _codecContext->width || ! _codecContext->height)
+        return;
+
+    InputFile& file = const_cast<InputFile&>(_fileProperties->getInputFile());
+    // Get the stream
+    IInputStream& stream = file.getStream(_streamIndex);
+    stream.activate();
+    // Create a decoder
+    VideoDecoder decoder(static_cast<InputStream&>(stream));
+
+    size_t count = 0;
+    int positionOfFirstKeyFrame = -1;
+    int positionOfLastKeyFrame = -1;
+    VideoFrame frame(VideoFrameDesc(getWidth(), getHeight(), getPixelFormatName(getPixelProperties().getAVPixelFormat())), false);
+    while(decoder.decodeNextFrame(frame))
     {
-        if(_codecContext->width && _codecContext->height)
+        AVFrame& avFrame = frame.getAVFrame();
+
+        _gopStructure.push_back(
+            std::make_pair(av_get_picture_type_char(avFrame.pict_type), av_frame_get_pkt_size(&avFrame)));
+        _isInterlaced = avFrame.interlaced_frame;
+        _isTopFieldFirst = avFrame.top_field_first;
+        if(avFrame.pict_type == AV_PICTURE_TYPE_I)
         {
-            // Discard no frame type when decode
-            _codecContext->skip_frame = AVDISCARD_NONE;
-
-            AVPacket pkt;
-            av_init_packet(&pkt);
-
-            // Initialize the AVCodecContext to use the given AVCodec
-            avcodec_open2(_codecContext, _codec, NULL);
-
-            VideoFrame frame(VideoFrameDesc(getWidth(), getHeight(), getPixelProperties().getPixelFormatName()), false);
-            AVFrame& avFrame = frame.getAVFrame();
-
-            size_t count = 0;
-            int gotFrame = 0;
-            int positionOfFirstKeyFrame = -1;
-            int positionOfLastKeyFrame = -1;
-
-            while(!av_read_frame(const_cast<AVFormatContext*>(_formatContext), &pkt))
-            {
-                if(pkt.stream_index == (int)_streamIndex)
-                {
-                    avcodec_decode_video2(_codecContext, &avFrame, &gotFrame, &pkt);
-                    if(gotFrame)
-                    {
-                        _gopStructure.push_back(
-                            std::make_pair(av_get_picture_type_char(avFrame.pict_type), av_frame_get_pkt_size(&avFrame)));
-                        _isInterlaced = avFrame.interlaced_frame;
-                        _isTopFieldFirst = avFrame.top_field_first;
-                        if(avFrame.pict_type == AV_PICTURE_TYPE_I)
-                        {
-                            if(positionOfFirstKeyFrame == -1)
-                                positionOfFirstKeyFrame = count;
-                            else
-                                positionOfLastKeyFrame = count;
-                        }
-
-                        _gopSize = ++count;
-                    }
-                }
-                av_free_packet(&pkt);
-
-                // If the first 2 key frames are found
-                if(positionOfFirstKeyFrame != -1 && positionOfLastKeyFrame != -1)
-                {
-                    // Set gop size as distance between these 2 key frames
-                    _gopSize = positionOfLastKeyFrame - positionOfFirstKeyFrame;
-                    // Update gop structure to keep only one gop
-                    while(_gopStructure.size() > _gopSize)
-                        _gopStructure.pop_back();
-                    break;
-                }
-            }
-
-            // Close a given AVCodecContext and free all the data associated with it (but not the AVCodecContext itself)
-            avcodec_close(_codecContext);
-
-            // Returns at the beginning of the stream
-            const_cast<FormatContext*>(&_fileProperties->getFormatContext())->seek(0, AVSEEK_FLAG_BYTE);
+            if(positionOfFirstKeyFrame == -1)
+                positionOfFirstKeyFrame = count;
+            else
+                positionOfLastKeyFrame = count;
         }
+
+        _gopSize = ++count;
+
+        // If the first 2 key frames are found
+        if(positionOfFirstKeyFrame != -1 && positionOfLastKeyFrame != -1)
+        {
+            // Set gop size as distance between these 2 key frames
+            _gopSize = positionOfLastKeyFrame - positionOfFirstKeyFrame;
+            // Update gop structure to keep only one gop
+            while(_gopStructure.size() > _gopSize)
+                _gopStructure.pop_back();
+            break;
+        }
+    }
+
+    // Returns at the beginning of the stream
+    file.seekAtFrame(0, AVSEEK_FLAG_BYTE);
+
+    // Check GOP size
+    if(_gopSize <= 0)
+    {
+        throw std::runtime_error("Invalid GOP size when decoding the first data.");
     }
 }
 
diff --git a/src/AvTranscoder/properties/VideoProperties.hpp b/src/AvTranscoder/properties/VideoProperties.hpp
@@ -42,7 +42,9 @@ class AvExport VideoProperties : public StreamProperties
 
     /**
      * @return The video bitrate in bits/s.
-     * @warning If there is no such info available in the container, this data is estimated by decoding the first GOP.
+     * @note 0 if unknown.
+     * @warning If there is no such info available in the container, this data is estimated using the information of the first GOP.
+     * @see eAnalyseLevelFirstGop
      */
     size_t getBitRate() const;
     size_t getMaxBitRate() const;
diff --git a/test/pyTest/testProperties.py b/test/pyTest/testProperties.py
@@ -114,25 +114,26 @@ def testCheckRawVideoProperties():
     inputFile = av.InputFile(inputFileName)
     properties = inputFile.getProperties()
 
+    # Check format
     assert_true(properties.isRawFormat())
     assert_equals(properties.getNbStreams(), 1)
     assert_equals(properties.getNbVideoStreams(), 1)
     assert_equals(properties.getDuration(), 0) # file duration is unknown
     assert_equals(properties.getBitRate(), 0) # file bitrate is unknown
+    assert_equals(properties.getFileSize(), 256293L)
 
-    expectedFileSize = 256293L
-    assert_equals(properties.getFileSize(), expectedFileSize)
-
-    expectedBitRate = 177200L
-    expectedNbFrames = 200
-    expectedDuration = 8
-    expectedFps = 25
-
+    # Check video stream when analyse the header
+    videoStream = properties.getVideoProperties()[0]
+    assert_equals(videoStream.getFps(), 25)
+    assert_equals(videoStream.getNbFrames(), 0) # stream nbFrames is unknown
+    assert_equals(videoStream.getDuration(), 0) # stream duration is unknown
+    assert_equals(videoStream.getBitRate(), 0) # stream bitrate is unknown
+    # Check video stream when analyse the first GOP
+    inputFile.analyse(av.NoDisplayProgress(), av.eAnalyseLevelFirstGop)
     videoStream = properties.getVideoProperties()[0]
-    assert_equals(videoStream.getNbFrames(), expectedNbFrames)
-    assert_equals(videoStream.getDuration(), expectedDuration)
-    assert_equals(videoStream.getBitRate(), expectedBitRate)
-    assert_equals(videoStream.getFps(), expectedFps)
+    assert_equals(videoStream.getNbFrames(), 200)
+    assert_equals(videoStream.getDuration(), 8)
+    assert_equals(videoStream.getBitRate(), 177200L)
 
 
 def testCheckAudioProperties():
diff --git a/test/pyTest/testTranscoderRewrap.py b/test/pyTest/testTranscoderRewrap.py
@@ -164,6 +164,7 @@ def testRewrapRawVideoStream():
     # get src file of wrap
     inputFileName = os.environ['AVTRANSCODER_TEST_VIDEO_RAW_FILE']
     src_inputFile = av.InputFile(inputFileName)
+    src_inputFile.analyse(av.NoDisplayProgress(), av.eAnalyseLevelFirstGop)
     src_properties = src_inputFile.getProperties()
     src_videoStream = src_properties.getVideoProperties()[0]
 
@@ -180,6 +181,7 @@ def testRewrapRawVideoStream():
 
     # get dst file of wrap
     dst_inputFile = av.InputFile(outputFileName)
+    dst_inputFile.analyse(av.NoDisplayProgress(), av.eAnalyseLevelFirstGop)
     dst_properties = dst_inputFile.getProperties()
     dst_videoStream = dst_properties.getVideoProperties()[0]