Skip to content

[GSOC 2025] Imgcodecs Metadata Support #27488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 25 commits into
base: 4.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
271 changes: 270 additions & 1 deletion modules/imgcodecs/include/opencv2/imgcodecs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#define OPENCV_IMGCODECS_HPP

#include "opencv2/core.hpp"
#include <map>

/**
@defgroup imgcodecs Image file reading and writing
Expand Down Expand Up @@ -252,6 +253,10 @@ enum ImwriteGIFCompressionFlags {
IMWRITE_GIF_COLORTABLE_SIZE_256 = 8
};

//! @} imgcodecs_flags

//! @addtogroup imgcodecs_metadata
//! @{
enum ImageMetadataType
{
IMAGE_METADATA_UNKNOWN = -1, // Used when metadata type is unrecognized or not set
Expand All @@ -263,7 +268,271 @@ enum ImageMetadataType
IMAGE_METADATA_MAX = 2 // Highest valid index (usually used for bounds checking)
};

//! @} imgcodecs_flags
enum ExifTagType
{
TAG_TYPE_NOTYPE = 0, // Invalid or undefined type
TAG_TYPE_BYTE = 1, // 8-bit unsigned integer
TAG_TYPE_ASCII = 2, // 8-bit ASCII string, null-terminated
TAG_TYPE_SHORT = 3, // 16-bit unsigned integer
TAG_TYPE_LONG = 4, // 32-bit unsigned integer
TAG_TYPE_RATIONAL = 5, // Two LONGs: numerator and denominator (64-bit unsigned fraction)
TAG_TYPE_SBYTE = 6, // 8-bit signed integer
TAG_TYPE_UNDEFINED = 7, // 8-bit untyped data
TAG_TYPE_SSHORT = 8, // 16-bit signed integer
TAG_TYPE_SLONG = 9, // 32-bit signed integer
TAG_TYPE_SRATIONAL = 10, // Two SLONGs: signed 64-bit fraction
TAG_TYPE_FLOAT = 11, // IEEE 32-bit float
TAG_TYPE_DOUBLE = 12, // IEEE 64-bit float
TAG_TYPE_IFD = 13, // 32-bit offset to IFD
TAG_TYPE_LONG8 = 16, // BigTIFF: 64-bit unsigned integer
TAG_TYPE_SLONG8 = 17, // BigTIFF: 64-bit signed integer
TAG_TYPE_IFD8 = 18 // BigTIFF: 64-bit offset to IFD
};

/**
* @brief Picture orientation which may be taken from EXIF
* Orientation usually matters when the picture is taken by
* smartphone or other camera with orientation sensor support
* Corresponds to EXIF 2.3 Specification
*/
enum ImageOrientation
{
IMAGE_ORIENTATION_TL = 1, ///< Horizontal (normal)
IMAGE_ORIENTATION_TR = 2, ///< Mirrored horizontal
IMAGE_ORIENTATION_BR = 3, ///< Rotate 180
IMAGE_ORIENTATION_BL = 4, ///< Mirrored vertical
IMAGE_ORIENTATION_LT = 5, ///< Mirrored horizontal & rotate 270 CW
IMAGE_ORIENTATION_RT = 6, ///< Rotate 90 CW
IMAGE_ORIENTATION_RB = 7, ///< Mirrored horizontal & rotate 90 CW
IMAGE_ORIENTATION_LB = 8 ///< Rotate 270 CW
};

/**
* @brief Base Exif tags used by IFD0 (main image)
*/
enum ExifTagId
{
TAG_EMPTY = 0,
TAG_SUB_FILETYPE = 254,
TAG_IMAGE_WIDTH = 256,
TAG_IMAGE_LENGTH = 257,
TAG_BITS_PER_SAMPLE = 258,
TAG_COMPRESSION = 259,
TAG_PHOTOMETRIC = 262,
TAG_THRESHOLDING = 263,
TAG_CELLWIDTH = 264,
TAG_CELLLENGTH = 265,
TAG_FILLORDER = 266,
TAG_DOCUMENTNAME = 269,
TAG_IMAGEDESCRIPTION = 270,
TAG_MAKE = 271,
TAG_MODEL = 272,
TAG_STRIP_OFFSET = 273,
TAG_ORIENTATION = 274,
TAG_SAMPLES_PER_PIXEL = 277,
TAG_ROWS_PER_STRIP = 278,
TAG_STRIP_BYTE_COUNTS = 279,

TAG_XRESOLUTION = 282,
TAG_YRESOLUTION = 283,
TAG_PLANAR_CONFIG = 284,
TAG_PAGENAME = 285,
TAG_XPOSITION = 286,
TAG_YPOSITION = 287,
TAG_GRAYRESPONSEUNIT = 290,
TAG_GRAYRESPONSECURVE = 291,
TAG_T4OPTIONS = 292,
TAG_T6OPTIONS = 293,
TAG_RESOLUTION_UNIT = 296,
TAG_PAGENUMBER = 297,
TAG_TRANSFERFUNCTION = 301,
TAG_SOFTWARE = 305,
TAG_MODIFYDATE = 306,
TAG_ARTIST = 315,
TAG_HOST_COMPUTER = 316,

TAG_SAMPLEFORMAT = 339,
TAG_JPGFROMRAWSTART = 513,
TAG_JPGFROMRAWLENGTH = 514,

TAG_YCBCRSUBSAMPLING = 530,
TAG_YCBCRPOSITIONING = 531,
TAG_REFERENCEBLACKWHITE = 532,

// DNG extension
TAG_CFA_REPEAT_PATTERN_DIM = 33421,
TAG_CFA_PATTERN = 33422,

TAG_COPYRIGHT = 33432,
TAG_EXPOSURE_TIME = 33434,
TAG_FNUMBER = 33437,

TAG_EXIF_OFFSET = 34665,
TAG_EXPOSUREPROGRAM = 34850,
TAG_GPSINFO = 34853,
TAG_ISOSPEED = 34855,

TAG_EXIF_VERSION = 36864,
TAG_DATETIME_ORIGINAL = 36867,
TAG_DATETIME_CREATE = 36868,

TAG_OFFSETTIME = 36880,
TAG_OFFSETTIME_ORIGINAL = 36881,
TAG_OFFSETTIME_DIGITIZED = 36882,

TAG_COMPONENTSCONFIGURATION = 37121,

TAG_SHUTTER_SPEED = 37377,
TAG_APERTURE_VALUE = 37378,
TAG_BRIGHTNESS_VALUE = 37379,
TAG_EXPOSUREBIASVALUE = 37380,
TAG_MAXAPERTUREVALUE = 37381,
TAG_SUBJECTDISTANCE = 37382,
TAG_METERINGMODE = 37383,
TAG_LIGHTSOURCE = 37384,
TAG_FLASH = 37385,
TAG_FOCALLENGTH = 37386,

TAG_SUBJECT_AREA = 37396,

TAG_EP_STANDARD_ID = 37398,

TAG_MAKERNOTE = 37500,
TAG_USERCOMMENT = 37510,

TAG_SUBSECTIME = 37520,

TAG_SUBSECTIME_ORIGINAL = 37521,
TAG_SUBSECTIME_DIGITIZED = 37522,

TAG_FLASHPIXVERSION = 40960,
TAG_COLORSPACE = 40961,
TAG_EXIF_IMAGE_WIDTH = 40962,
TAG_EXIF_IMAGE_HEIGHT = 40963,

TAG_FOCALPLANEXRESOLUTION = 41486,
TAG_FOCALPLANEYRESOLUTION = 41487,
TAG_FOCALPLANERESOLUTIONUNIT = 41488,

TAG_SCENE_TYPE = 41729,
TAG_CUSTOMRENDERED = 41985,
TAG_EXPOSUREMODE = 41986,
TAG_WHITE_BALANCE = 41987,
TAG_SCENECAPTURETYPE = 41990,

TAG_BODYSERIALNUMBER = 42033,
TAG_LENSSPECIFICATION = 42034,
TAG_LENSMAKE = 42035,
TAG_LENSMODEL = 42036,

TAG_DNG_VERSION = 50706,
TAG_DNG_BACKWARD_VERSION = 50707,
TAG_UNIQUE_CAMERA_MODEL = 50708,
TAG_CHROMA_BLUR_RADIUS = 50703,
TAG_CFA_PLANECOLOR = 50710,
TAG_CFA_LAYOUT = 50711,
TAG_BLACK_LEVEL_REPEAT_DIM = 50713,
TAG_BLACK_LEVEL = 50714,
TAG_WHITE_LEVEL = 50717,
TAG_DEFAULT_SCALE = 50718,
TAG_DEFAULT_CROP_ORIGIN = 50719,
TAG_DEFAULT_CROP_SIZE = 50720,
TAG_COLOR_MATRIX1 = 50721,
TAG_COLOR_MATRIX2 = 50722,
TAG_CAMERA_CALIBRATION1 = 50723,
TAG_CAMERA_CALIBRATION2 = 50724,
TAG_ANALOG_BALANCE = 50727,
TAG_AS_SHOT_NEUTRAL = 50728,
TAG_AS_SHOT_WHITE_XY = 50729,
TAG_BASELINE_EXPOSURE = 50730,
TAG_CALIBRATION_ILLUMINANT1 = 50778,
TAG_CALIBRATION_ILLUMINANT2 = 50779,
TAG_EXTRA_CAMERA_PROFILES = 50933,
TAG_PROFILE_NAME = 50936,
TAG_AS_SHOT_PROFILE_NAME = 50934,
TAG_PREVIEW_COLORSPACE = 50970,
TAG_OPCODE_LIST2 = 51009,
TAG_NOISE_PROFILE = 51041,
TAG_DEFAULT_BLACK_RENDER = 51110,
TAG_ACTIVE_AREA = 50829,
TAG_FORWARD_MATRIX1 = 50964,
TAG_FORWARD_MATRIX2 = 50965,
TAG_INVALID_TAG = 65535
};

struct CV_EXPORTS_W_SIMPLE urational64_t
{
CV_PROP_RW int num = 0;
CV_PROP_RW int denom = 1;
};

struct CV_EXPORTS_W_SIMPLE srational64_t
{
CV_PROP_RW int num = 0;
CV_PROP_RW int denom = 1;
};

struct CV_EXPORTS_W_SIMPLE ExifEntry
{
public:
ExifEntry()
: tagId(TAG_EMPTY), type(TAG_TYPE_NOTYPE), count(1),
value_u32(0), value_str(), value_raw(), value_srational() {}
~ExifEntry() = default;

CV_PROP_RW int tagId;
CV_PROP_RW int type;
CV_PROP_RW int count;

CV_WRAP int getValueAsInt() const { return value_u32; }
CV_WRAP std::string getValueAsString() const { return value_str; }
CV_WRAP std::vector<uchar> getValueAsRaw() const { return value_raw; }
CV_WRAP std::vector<srational64_t> getValueAsRational() const { return value_srational; }

CV_WRAP void setValueAsString(const std::string& value) {
value_str = value;
count = static_cast<int>(value.size());
}

CV_WRAP void setValueAsInt(int value) { value_u32 = value; }
CV_WRAP void setValueAsRaw(const std::vector<uchar>& value) { value_raw = value; }
CV_WRAP void setValueAsRational(const std::vector<srational64_t>& value) { value_srational = value; }

CV_WRAP bool empty() const { return tagId == TAG_EMPTY; }
CV_WRAP std::string getTagIdAsString() const;
CV_WRAP std::string getTagTypeAsString() const;
std::ostream& dump(std::ostream& strm) const;

private:
int value_u32;
std::string value_str;
std::vector<uchar> value_raw;
std::vector<srational64_t> value_srational;
};

/** @brief Decodes EXIF metadata from binary data into structured ExifEntry entries.

This function parses raw EXIF binary data and extracts metadata tags as structured `ExifEntry` objects.
The extracted entries are organized as a vector of IFD (Image File Directory) blocks, where each IFD is a vector of `ExifEntry`.

@param data The input binary EXIF data buffer.
@param exif_entries Output vector of IFD blocks. Each IFD block is a vector of `ExifEntry` objects containing decoded tag information (tag ID, type, count, and value).
@return Returns `true` if decoding was successful, `false` otherwise.
*/
CV_EXPORTS_W bool decodeExif(const std::vector<uchar>& data, CV_OUT std::vector< std::vector<ExifEntry> >& exif_entries);

/** @brief Encodes structured ExifEntry metadata into binary EXIF data.

This function serializes a collection of ExifEntry objects into a binary EXIF data block.
The input entries are expected to be organized as a vector of IFD blocks, matching the EXIF file structure (e.g., primary IFD, Exif IFD, GPS IFD).

@param exif_entries Input vector of IFD blocks. Each IFD block is a vector of ExifEntry objects containing tag metadata to be encoded.
@param data Output buffer where the encoded EXIF binary data will be stored.
@return Returns `true` if decoding was successful, `false` otherwise.
*/
CV_EXPORTS_W bool encodeExif(const std::vector<std::vector<ExifEntry>>& exif_entries, CV_OUT std::vector<uchar>& data);

//! @} imgcodecs_metadata

/** @brief Represents an animation with multiple frames.
The `Animation` struct is designed to store and manage data for animated sequences such as those from animated formats (e.g., GIF, AVIF, APNG, WebP).
Expand Down
6 changes: 6 additions & 0 deletions modules/imgcodecs/misc/python/pyopencv_imgcodecs.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifdef HAVE_OPENCV_IMGCODECS

typedef std::vector<srational64_t> vector_srational64_t;
typedef std::vector<std::vector<ExifEntry> > vector_vector_ExifEntry;

#endif
Loading
Loading