|
2054 | 2054 | <div><h2 id="scraping"><a href="#scraping" name="scraping">#</a>Scraping</h2><div><h4 id="scrapespythonsurlversionnumberandlogofromwikipediapage">Scrapes Python's URL, version number and logo from Wikipedia page:</h4><pre><code class="python language-python hljs"><span class="hljs-comment"># $ pip3 install requests beautifulsoup4</span>
|
2055 | 2055 | <span class="hljs-keyword">import</span> requests
|
2056 | 2056 | <span class="hljs-keyword">from</span> bs4 <span class="hljs-keyword">import</span> BeautifulSoup
|
2057 |
| -url = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span> |
2058 |
| -html = requests.get(url).text |
2059 |
| -doc = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>) |
2060 |
| -table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>) |
2061 |
| -rows = table.find_all(<span class="hljs-string">'tr'</span>) |
2062 |
| -link = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>] |
2063 |
| -ver = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>] |
2064 |
| -url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>] |
2065 |
| -image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content |
2066 |
| -<span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file: |
2067 |
| - file.write(image) |
2068 |
| -print(link, ver) |
| 2057 | +URL = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span> |
| 2058 | +<span class="hljs-keyword">try</span>: |
| 2059 | + html = requests.get(URL).text |
| 2060 | + doc = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>) |
| 2061 | + table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>) |
| 2062 | + rows = table.find_all(<span class="hljs-string">'tr'</span>) |
| 2063 | + link = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>] |
| 2064 | + ver = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>] |
| 2065 | + url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>] |
| 2066 | + image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content |
| 2067 | + <span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file: |
| 2068 | + file.write(image) |
| 2069 | + print(link, ver) |
| 2070 | +<span class="hljs-keyword">except</span> requests.exceptions.ConnectionError: |
| 2071 | + print(<span class="hljs-string">"You've got problems with connection."</span>) |
2069 | 2072 | </code></pre></div></div>
|
2070 | 2073 |
|
2071 | 2074 |
|
|
0 commit comments