Skip to content

[3.8] bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-3062) #14375

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/library/mimetypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ behavior of the module.
Specifying an empty list for *files* will prevent the system defaults from
being applied: only the well-known values will be present from a built-in list.

If *files* is ``None`` the internal data structure is completely rebuilt to its
initial default value. This is a stable operation and will produce the same results
when called multiple times.

.. versionchanged:: 3.2
Previously, Windows registry settings were ignored.

Expand Down
251 changes: 131 additions & 120 deletions Lib/mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ class MimeTypes:
def __init__(self, filenames=(), strict=True):
if not inited:
init()
self.encodings_map = encodings_map.copy()
self.suffix_map = suffix_map.copy()
self.encodings_map = _encodings_map_default.copy()
self.suffix_map = _suffix_map_default.copy()
self.types_map = ({}, {}) # dict for (non-strict, strict)
self.types_map_inv = ({}, {})
for (ext, type) in types_map.items():
for (ext, type) in _types_map_default.items():
self.add_type(type, ext, True)
for (ext, type) in common_types.items():
for (ext, type) in _common_types_default.items():
self.add_type(type, ext, False)
for name in filenames:
self.read(name, strict)
Expand Down Expand Up @@ -346,11 +346,19 @@ def init(files=None):
global suffix_map, types_map, encodings_map, common_types
global inited, _db
inited = True # so that MimeTypes.__init__() doesn't call us again
db = MimeTypes()
if files is None:

if files is None or _db is None:
db = MimeTypes()
if _winreg:
db.read_windows_registry()
files = knownfiles

if files is None:
files = knownfiles
else:
files = knownfiles + list(files)
else:
db = _db

for file in files:
if os.path.isfile(file):
db.read(file)
Expand All @@ -374,12 +382,12 @@ def read_mime_types(file):


def _default_mime_types():
global suffix_map
global encodings_map
global types_map
global common_types
global suffix_map, _suffix_map_default
global encodings_map, _encodings_map_default
global types_map, _types_map_default
global common_types, _common_types_default

suffix_map = {
suffix_map = _suffix_map_default = {
'.svgz': '.svg.gz',
'.tgz': '.tar.gz',
'.taz': '.tar.gz',
Expand All @@ -388,7 +396,7 @@ def _default_mime_types():
'.txz': '.tar.xz',
}

encodings_map = {
encodings_map = _encodings_map_default = {
'.gz': 'gzip',
'.Z': 'compress',
'.bz2': 'bzip2',
Expand All @@ -399,152 +407,155 @@ def _default_mime_types():
# at http://www.iana.org/assignments/media-types
# or extensions, i.e. using the x- prefix

# If you add to these, please keep them sorted!
types_map = {
# If you add to these, please keep them sorted by mime type.
# Make sure the entry with the preferred file extension for a particular mime type
# appears before any others of the same mimetype.
types_map = _types_map_default = {
'.js' : 'application/javascript',
'.mjs' : 'application/javascript',
'.json' : 'application/json',
'.doc' : 'application/msword',
'.dot' : 'application/msword',
'.wiz' : 'application/msword',
'.bin' : 'application/octet-stream',
'.a' : 'application/octet-stream',
'.dll' : 'application/octet-stream',
'.exe' : 'application/octet-stream',
'.o' : 'application/octet-stream',
'.obj' : 'application/octet-stream',
'.so' : 'application/octet-stream',
'.oda' : 'application/oda',
'.pdf' : 'application/pdf',
'.p7c' : 'application/pkcs7-mime',
'.ps' : 'application/postscript',
'.ai' : 'application/postscript',
'.aif' : 'audio/x-aiff',
'.aifc' : 'audio/x-aiff',
'.aiff' : 'audio/x-aiff',
'.au' : 'audio/basic',
'.avi' : 'video/x-msvideo',
'.bat' : 'text/plain',
'.eps' : 'application/postscript',
'.m3u' : 'application/vnd.apple.mpegurl',
'.m3u8' : 'application/vnd.apple.mpegurl',
'.xls' : 'application/vnd.ms-excel',
'.xlb' : 'application/vnd.ms-excel',
'.ppt' : 'application/vnd.ms-powerpoint',
'.pot' : 'application/vnd.ms-powerpoint',
'.ppa' : 'application/vnd.ms-powerpoint',
'.pps' : 'application/vnd.ms-powerpoint',
'.pwz' : 'application/vnd.ms-powerpoint',
'.wasm' : 'application/wasm',
'.bcpio' : 'application/x-bcpio',
'.bin' : 'application/octet-stream',
'.bmp' : 'image/bmp',
'.c' : 'text/plain',
'.cdf' : 'application/x-netcdf',
'.cpio' : 'application/x-cpio',
'.csh' : 'application/x-csh',
'.css' : 'text/css',
'.csv' : 'text/csv',
'.dll' : 'application/octet-stream',
'.doc' : 'application/msword',
'.dot' : 'application/msword',
'.dvi' : 'application/x-dvi',
'.eml' : 'message/rfc822',
'.eps' : 'application/postscript',
'.etx' : 'text/x-setext',
'.exe' : 'application/octet-stream',
'.gif' : 'image/gif',
'.gtar' : 'application/x-gtar',
'.h' : 'text/plain',
'.hdf' : 'application/x-hdf',
'.htm' : 'text/html',
'.html' : 'text/html',
'.ico' : 'image/vnd.microsoft.icon',
'.ief' : 'image/ief',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.jpg' : 'image/jpeg',
'.js' : 'application/javascript',
'.json' : 'application/json',
'.ksh' : 'text/plain',
'.latex' : 'application/x-latex',
'.m1v' : 'video/mpeg',
'.m3u' : 'application/vnd.apple.mpegurl',
'.m3u8' : 'application/vnd.apple.mpegurl',
'.man' : 'application/x-troff-man',
'.me' : 'application/x-troff-me',
'.mht' : 'message/rfc822',
'.mhtml' : 'message/rfc822',
'.mif' : 'application/x-mif',
'.mjs' : 'application/javascript',
'.mov' : 'video/quicktime',
'.movie' : 'video/x-sgi-movie',
'.mp2' : 'audio/mpeg',
'.mp3' : 'audio/mpeg',
'.mp4' : 'video/mp4',
'.mpa' : 'video/mpeg',
'.mpe' : 'video/mpeg',
'.mpeg' : 'video/mpeg',
'.mpg' : 'video/mpeg',
'.ms' : 'application/x-troff-ms',
'.cdf' : 'application/x-netcdf',
'.nc' : 'application/x-netcdf',
'.nws' : 'message/rfc822',
'.o' : 'application/octet-stream',
'.obj' : 'application/octet-stream',
'.oda' : 'application/oda',
'.p12' : 'application/x-pkcs12',
'.p7c' : 'application/pkcs7-mime',
'.pbm' : 'image/x-portable-bitmap',
'.pdf' : 'application/pdf',
'.pfx' : 'application/x-pkcs12',
'.pgm' : 'image/x-portable-graymap',
'.pl' : 'text/plain',
'.png' : 'image/png',
'.pnm' : 'image/x-portable-anymap',
'.pot' : 'application/vnd.ms-powerpoint',
'.ppa' : 'application/vnd.ms-powerpoint',
'.ppm' : 'image/x-portable-pixmap',
'.pps' : 'application/vnd.ms-powerpoint',
'.ppt' : 'application/vnd.ms-powerpoint',
'.ps' : 'application/postscript',
'.pwz' : 'application/vnd.ms-powerpoint',
'.py' : 'text/x-python',
'.ram' : 'application/x-pn-realaudio',
'.pyc' : 'application/x-python-code',
'.pyo' : 'application/x-python-code',
'.qt' : 'video/quicktime',
'.ra' : 'audio/x-pn-realaudio',
'.ram' : 'application/x-pn-realaudio',
'.ras' : 'image/x-cmu-raster',
'.rdf' : 'application/xml',
'.rgb' : 'image/x-rgb',
'.roff' : 'application/x-troff',
'.rtx' : 'text/richtext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
'.sh' : 'application/x-sh',
'.shar' : 'application/x-shar',
'.snd' : 'audio/basic',
'.so' : 'application/octet-stream',
'.src' : 'application/x-wais-source',
'.swf' : 'application/x-shockwave-flash',
'.sv4cpio': 'application/x-sv4cpio',
'.sv4crc' : 'application/x-sv4crc',
'.svg' : 'image/svg+xml',
'.swf' : 'application/x-shockwave-flash',
'.t' : 'application/x-troff',
'.tar' : 'application/x-tar',
'.tcl' : 'application/x-tcl',
'.tex' : 'application/x-tex',
'.texi' : 'application/x-texinfo',
'.texinfo': 'application/x-texinfo',
'.tif' : 'image/tiff',
'.tiff' : 'image/tiff',
'.roff' : 'application/x-troff',
'.t' : 'application/x-troff',
'.tr' : 'application/x-troff',
'.tsv' : 'text/tab-separated-values',
'.txt' : 'text/plain',
'.man' : 'application/x-troff-man',
'.me' : 'application/x-troff-me',
'.ms' : 'application/x-troff-ms',
'.ustar' : 'application/x-ustar',
'.vcf' : 'text/x-vcard',
'.wasm' : 'application/wasm',
'.wav' : 'audio/x-wav',
'.webm' : 'video/webm',
'.wiz' : 'application/msword',
'.src' : 'application/x-wais-source',
'.xsl' : 'application/xml',
'.rdf' : 'application/xml',
'.wsdl' : 'application/xml',
'.xbm' : 'image/x-xbitmap',
'.xlb' : 'application/vnd.ms-excel',
'.xls' : 'application/vnd.ms-excel',
'.xml' : 'text/xml',
'.xpdl' : 'application/xml',
'.zip' : 'application/zip',
'.au' : 'audio/basic',
'.snd' : 'audio/basic',
'.mp3' : 'audio/mpeg',
'.mp2' : 'audio/mpeg',
'.aif' : 'audio/x-aiff',
'.aifc' : 'audio/x-aiff',
'.aiff' : 'audio/x-aiff',
'.ra' : 'audio/x-pn-realaudio',
'.wav' : 'audio/x-wav',
'.bmp' : 'image/bmp',
'.gif' : 'image/gif',
'.ief' : 'image/ief',
'.jpg' : 'image/jpeg',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.png' : 'image/png',
'.svg' : 'image/svg+xml',
'.tiff' : 'image/tiff',
'.tif' : 'image/tiff',
'.ico' : 'image/vnd.microsoft.icon',
'.ras' : 'image/x-cmu-raster',
'.bmp' : 'image/x-ms-bmp',
'.pnm' : 'image/x-portable-anymap',
'.pbm' : 'image/x-portable-bitmap',
'.pgm' : 'image/x-portable-graymap',
'.ppm' : 'image/x-portable-pixmap',
'.rgb' : 'image/x-rgb',
'.xbm' : 'image/x-xbitmap',
'.xpm' : 'image/x-xpixmap',
'.xsl' : 'application/xml',
'.xwd' : 'image/x-xwindowdump',
'.zip' : 'application/zip',
'.eml' : 'message/rfc822',
'.mht' : 'message/rfc822',
'.mhtml' : 'message/rfc822',
'.nws' : 'message/rfc822',
'.css' : 'text/css',
'.csv' : 'text/csv',
'.html' : 'text/html',
'.htm' : 'text/html',
'.txt' : 'text/plain',
'.bat' : 'text/plain',
'.c' : 'text/plain',
'.h' : 'text/plain',
'.ksh' : 'text/plain',
'.pl' : 'text/plain',
'.rtx' : 'text/richtext',
'.tsv' : 'text/tab-separated-values',
'.py' : 'text/x-python',
'.etx' : 'text/x-setext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
'.vcf' : 'text/x-vcard',
'.xml' : 'text/xml',
'.mp4' : 'video/mp4',
'.mpeg' : 'video/mpeg',
'.m1v' : 'video/mpeg',
'.mpa' : 'video/mpeg',
'.mpe' : 'video/mpeg',
'.mpg' : 'video/mpeg',
'.mov' : 'video/quicktime',
'.qt' : 'video/quicktime',
'.webm' : 'video/webm',
'.avi' : 'video/x-msvideo',
'.movie' : 'video/x-sgi-movie',
}

# These are non-standard types, commonly found in the wild. They will
# only match if strict=0 flag is given to the API methods.

# Please sort these too
common_types = {
'.jpg' : 'image/jpg',
'.mid' : 'audio/midi',
common_types = _common_types_default = {
'.rtf' : 'application/rtf',
'.midi': 'audio/midi',
'.mid' : 'audio/midi',
'.jpg' : 'image/jpg',
'.pict': 'image/pict',
'.pct' : 'image/pict',
'.pic' : 'image/pict',
'.pict': 'image/pict',
'.rtf' : 'application/rtf',
'.xul' : 'text/xul'
'.xul' : 'text/xul',
}


Expand Down
51 changes: 51 additions & 0 deletions Lib/test/test_mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,57 @@ def test_encoding(self):
strict=True)
self.assertEqual(exts, ['.g3', '.g\xb3'])

def test_init_reinitializes(self):
# Issue 4936: make sure an init starts clean
# First, put some poison into the types table
mimetypes.add_type('foo/bar', '.foobar')
self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar')
# Reinitialize
mimetypes.init()
# Poison should be gone.
self.assertEqual(mimetypes.guess_extension('foo/bar'), None)

def test_preferred_extension(self):
def check_extensions():
self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin')
self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps')
self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u')
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls')
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt')
self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi')
self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff')
self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl')
self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3')
self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg')
self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff')
self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml')
self.assertEqual(mimetypes.guess_extension('text/html'), '.html')
self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt')
self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg')
self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov')

check_extensions()
mimetypes.init()
check_extensions()

def test_init_stability(self):
mimetypes.init()

suffix_map = mimetypes.suffix_map
encodings_map = mimetypes.encodings_map
types_map = mimetypes.types_map
common_types = mimetypes.common_types

mimetypes.init()
self.assertIsNot(suffix_map, mimetypes.suffix_map)
self.assertIsNot(encodings_map, mimetypes.encodings_map)
self.assertIsNot(types_map, mimetypes.types_map)
self.assertIsNot(common_types, mimetypes.common_types)
self.assertEqual(suffix_map, mimetypes.suffix_map)
self.assertEqual(encodings_map, mimetypes.encodings_map)
self.assertEqual(types_map, mimetypes.types_map)
self.assertEqual(common_types, mimetypes.common_types)

def test_path_like_ob(self):
filename = "LICENSE.txt"
filepath = pathlib.Path(filename)
Expand Down
Loading