|
2140 | 2140 | <li><strong><code class="python hljs"><span class="hljs-string">'<str>'</span></code> - Max age as a string: <code class="python hljs"><span class="hljs-string">'1 week, 3 days'</span></code>, <code class="python hljs"><span class="hljs-string">'2 months'</span></code>, …</strong></li>
|
2141 | 2141 | </ul>
|
2142 | 2142 | <div><h2 id="scraping"><a href="#scraping" name="scraping">#</a>Scraping</h2><div><h4 id="scrapespythonsurlversionnumberandlogofromitswikipediapage">Scrapes Python's URL, version number and logo from its Wikipedia page:</h4><pre><code class="python language-python hljs"><span class="hljs-comment"># $ pip3 install requests beautifulsoup4</span>
|
2143 |
| -<span class="hljs-keyword">import</span> requests, sys |
2144 |
| -<span class="hljs-keyword">from</span> bs4 <span class="hljs-keyword">import</span> BeautifulSoup |
| 2143 | +<span class="hljs-keyword">import</span> requests, bs4, sys |
2145 | 2144 | URL = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
|
2146 | 2145 | <span class="hljs-keyword">try</span>:
|
2147 | 2146 | html = requests.get(URL).text
|
2148 |
| - doc = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>) |
| 2147 | + doc = bs4.BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>) |
2149 | 2148 | table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
|
2150 |
| - rows = table.find_all(<span class="hljs-string">'tr'</span>) |
2151 |
| - link = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>] |
2152 |
| - ver = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>] |
2153 |
| - url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>] |
| 2149 | + link = table.find(<span class="hljs-string">'th'</span>, text=<span class="hljs-string">'Website'</span>).next_sibling.a[<span class="hljs-string">'href'</span>] |
| 2150 | + ver = table.find(<span class="hljs-string">'th'</span>, text=<span class="hljs-string">'Stable release'</span>).next_sibling.strings.__next__() |
| 2151 | + url_i = table.find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>] |
2154 | 2152 | image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content
|
2155 | 2153 | <span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file:
|
2156 | 2154 | file.write(image)
|
|
0 commit comments