|
| 1 | + |
| 2 | +Core Document Properties |
| 3 | +======================== |
| 4 | + |
| 5 | +The Open XML format provides for a set of descriptive properties to be |
| 6 | +maintained with each document. One of these is the *core file properties*. |
| 7 | +The core properties are common to all Open XML formats and appear in |
| 8 | +document, presentation, and spreadsheet files. The 'Core' in core document |
| 9 | +properties refers to `Dublin Core`_, a metadata standard that defines a core |
| 10 | +set of elements to describe resources. |
| 11 | + |
| 12 | +The core properties are described in Part 2 of the ISO/IEC 29500 spec, in |
| 13 | +Section 11. The names of some core properties in |docx| are changed from |
| 14 | +those in the spec to conform to the MS API. |
| 15 | + |
| 16 | +Other properties such as company name are custom properties, held in |
| 17 | +``app.xml``. |
| 18 | + |
| 19 | + |
| 20 | +Candidate Protocol |
| 21 | +------------------ |
| 22 | + |
| 23 | +:: |
| 24 | + |
| 25 | + >>> document = Document() |
| 26 | + >>> core_properties = document.core_properties |
| 27 | + >>> core_properties.author |
| 28 | + 'python-docx' |
| 29 | + >>> core_properties.author = 'Brian' |
| 30 | + >>> core_properties.author |
| 31 | + 'Brian' |
| 32 | + |
| 33 | + |
| 34 | +Properties |
| 35 | +---------- |
| 36 | + |
| 37 | +15 properties are supported. All unicode values are limited to 255 characters |
| 38 | +(not bytes). |
| 39 | + |
| 40 | +author *(unicode)* |
| 41 | + Note: named 'creator' in spec. An entity primarily responsible for making |
| 42 | + the content of the resource. (Dublin Core) |
| 43 | + |
| 44 | +category *(unicode)* |
| 45 | + A categorization of the content of this package. Example values for this |
| 46 | + property might include: Resume, Letter, Financial Forecast, Proposal, |
| 47 | + Technical Presentation, and so on. (Open Packaging Conventions) |
| 48 | + |
| 49 | +comments *(unicode)* |
| 50 | + Note: named 'description' in spec. An explanation of the content of the |
| 51 | + resource. Values might include an abstract, table of contents, reference |
| 52 | + to a graphical representation of content, and a free-text account of the |
| 53 | + content. (Dublin Core) |
| 54 | + |
| 55 | +content_status *(unicode)* |
| 56 | + The status of the content. Values might include “Draft”, “Reviewed”, and |
| 57 | + “Final”. (Open Packaging Conventions) |
| 58 | + |
| 59 | +created *(datetime)* |
| 60 | + Date of creation of the resource. (Dublin Core) |
| 61 | + |
| 62 | +identifier *(unicode)* |
| 63 | + An unambiguous reference to the resource within a given context. |
| 64 | + (Dublin Core) |
| 65 | + |
| 66 | +keywords *(unicode)* |
| 67 | + A delimited set of keywords to support searching and indexing. This is |
| 68 | + typically a list of terms that are not available elsewhere in the |
| 69 | + properties. (Open Packaging Conventions) |
| 70 | + |
| 71 | +language *(unicode)* |
| 72 | + The language of the intellectual content of the resource. (Dublin Core) |
| 73 | + |
| 74 | +last_modified_by *(unicode)* |
| 75 | + The user who performed the last modification. The identification is |
| 76 | + environment-specific. Examples include a name, email address, or employee |
| 77 | + ID. It is recommended that this value be as concise as possible. |
| 78 | + (Open Packaging Conventions) |
| 79 | + |
| 80 | +last_printed *(datetime)* |
| 81 | + The date and time of the last printing. (Open Packaging Conventions) |
| 82 | + |
| 83 | +modified *(datetime)* |
| 84 | + Date on which the resource was changed. (Dublin Core) |
| 85 | + |
| 86 | +revision *(int)* |
| 87 | + The revision number. This value might indicate the number of saves or |
| 88 | + revisions, provided the application updates it after each revision. |
| 89 | + (Open Packaging Conventions) |
| 90 | + |
| 91 | +subject *(unicode)* |
| 92 | + The topic of the content of the resource. (Dublin Core) |
| 93 | + |
| 94 | +title *(unicode)* |
| 95 | + The name given to the resource. (Dublin Core) |
| 96 | + |
| 97 | +version *(unicode)* |
| 98 | + The version designator. This value is set by the user or by the |
| 99 | + application. (Open Packaging Conventions) |
| 100 | + |
| 101 | + |
| 102 | +Specimen XML |
| 103 | +------------ |
| 104 | + |
| 105 | +.. highlight:: xml |
| 106 | + |
| 107 | +core.xml produced by Microsoft Word:: |
| 108 | + |
| 109 | + <?xml version="1.0" encoding="UTF-8" standalone="yes"?> |
| 110 | + <cp:coreProperties |
| 111 | + xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" |
| 112 | + xmlns:dc="http://purl.org/dc/elements/1.1/" |
| 113 | + xmlns:dcterms="http://purl.org/dc/terms/" |
| 114 | + xmlns:dcmitype="http://purl.org/dc/dcmitype/" |
| 115 | + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
| 116 | + <dc:title>Core Document Properties Exploration</dc:title> |
| 117 | + <dc:subject>PowerPoint core document properties</dc:subject> |
| 118 | + <dc:creator>Steve Canny</dc:creator> |
| 119 | + <cp:keywords>powerpoint; open xml; dublin core; microsoft office</cp:keywords> |
| 120 | + <dc:description> |
| 121 | + One thing I'd like to discover is just how line wrapping is handled |
| 122 | + in the comments. This paragraph is all on a single |
| 123 | + line._x000d__x000d_This is a second paragraph separated from the |
| 124 | + first by two line feeds. |
| 125 | + </dc:description> |
| 126 | + <cp:lastModifiedBy>Steve Canny</cp:lastModifiedBy> |
| 127 | + <cp:revision>2</cp:revision> |
| 128 | + <dcterms:created xsi:type="dcterms:W3CDTF">2013-04-06T06:03:36Z</dcterms:created> |
| 129 | + <dcterms:modified xsi:type="dcterms:W3CDTF">2013-06-15T06:09:18Z</dcterms:modified> |
| 130 | + <cp:category>analysis</cp:category> |
| 131 | + </cp:coreProperties> |
| 132 | + |
| 133 | + |
| 134 | +Schema |
| 135 | +====== |
| 136 | + |
| 137 | +:: |
| 138 | + |
| 139 | + <xs:schema |
| 140 | + targetNamespace="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" |
| 141 | + xmlns="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" |
| 142 | + xmlns:xs="http://www.w3.org/2001/XMLSchema" |
| 143 | + xmlns:dc="http://purl.org/dc/elements/1.1/" |
| 144 | + xmlns:dcterms="http://purl.org/dc/terms/" |
| 145 | + elementFormDefault="qualified" |
| 146 | + blockDefault="#all"> |
| 147 | + |
| 148 | + <xs:import |
| 149 | + namespace="http://purl.org/dc/elements/1.1/" |
| 150 | + schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd"/> |
| 151 | + <xs:import |
| 152 | + namespace="http://purl.org/dc/terms/" |
| 153 | + schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dcterms.xsd"/> |
| 154 | + <xs:import |
| 155 | + id="xml" |
| 156 | + namespace="http://www.w3.org/XML/1998/namespace"/> |
| 157 | + |
| 158 | + <xs:element name="coreProperties" type="CT_CoreProperties"/> |
| 159 | + |
| 160 | + <xs:complexType name="CT_CoreProperties"> |
| 161 | + <xs:all> |
| 162 | + <xs:element name="category" type="xs:string" minOccurs="0"/> |
| 163 | + <xs:element name="contentStatus" type="xs:string" minOccurs="0"/> |
| 164 | + <xs:element ref="dcterms:created" minOccurs="0"/> |
| 165 | + <xs:element ref="dc:creator" minOccurs="0"/> |
| 166 | + <xs:element ref="dc:description" minOccurs="0"/> |
| 167 | + <xs:element ref="dc:identifier" minOccurs="0"/> |
| 168 | + <xs:element name="keywords" type="CT_Keywords" minOccurs="0"/> |
| 169 | + <xs:element ref="dc:language" minOccurs="0"/> |
| 170 | + <xs:element name="lastModifiedBy" type="xs:string" minOccurs="0"/> |
| 171 | + <xs:element name="lastPrinted" type="xs:dateTime" minOccurs="0"/> |
| 172 | + <xs:element ref="dcterms:modified" minOccurs="0"/> |
| 173 | + <xs:element name="revision" type="xs:string" minOccurs="0"/> |
| 174 | + <xs:element ref="dc:subject" minOccurs="0"/> |
| 175 | + <xs:element ref="dc:title" minOccurs="0"/> |
| 176 | + <xs:element name="version" type="xs:string" minOccurs="0"/> |
| 177 | + </xs:all> |
| 178 | + </xs:complexType> |
| 179 | + |
| 180 | + <xs:complexType name="CT_Keywords" mixed="true"> |
| 181 | + <xs:sequence> |
| 182 | + <xs:element name="value" minOccurs="0" maxOccurs="unbounded" type="CT_Keyword"/> |
| 183 | + </xs:sequence> |
| 184 | + <xs:attribute ref="xml:lang" use="optional"/> |
| 185 | + </xs:complexType> |
| 186 | + |
| 187 | + <xs:complexType name="CT_Keyword"> |
| 188 | + <xs:simpleContent> |
| 189 | + <xs:extension base="xs:string"> |
| 190 | + <xs:attribute ref="xml:lang" use="optional"/> |
| 191 | + </xs:extension> |
| 192 | + </xs:simpleContent> |
| 193 | + </xs:complexType> |
| 194 | + |
| 195 | + </xs:schema> |
| 196 | + |
| 197 | + |
| 198 | +.. _Dublin Core: |
| 199 | + http://en.wikipedia.org/wiki/Dublin_Core |
0 commit comments