Skip to content

Commit 689e15c

Browse files
committed
refactor: #11 parse confluence index.html tree
1 parent 675e61d commit 689e15c

File tree

2 files changed

+48
-25
lines changed

2 files changed

+48
-25
lines changed

app/Commands/WikiImportCommand.php

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -160,36 +160,17 @@ private function handleConfluenceHtml(): int
160160
$this->info('空间名称:' . $space['name']);
161161
$this->info('空间标识:' . $space['key']);
162162

163-
$divElements = $this->document->getElementById('content')->getElementsByTagName('div');
164-
$divElement = null;
165-
foreach ($divElements as $divElement) {
166-
if ($divElement->getAttribute('class') != 'pageSection') {
167-
continue;
168-
}
169-
$h2Element = $divElement->getElementsByTagName('h2')[0];
170-
if (!empty($h2Element) && $h2Element->nodeValue == 'Available Pages:') {
171-
break;
172-
}
173-
}
174-
if (empty($divElement)) {
163+
$pages = $this->confluence->parseAvailablePages($this->document);
164+
if (empty($pages['tree'])) {
175165
$this->info("未发现有效数据");
176166
return 0;
177-
}
178-
$xpath = new \DOMXPath($this->document);
179-
$firstLevelLiElements = $xpath->query('ul/li', $divElement);
180-
$this->info("发现 {$firstLevelLiElements->count()} 个一级页面");
181-
if ($firstLevelLiElements->count() == 0) {
182-
return 0;
167+
} else {
168+
$this->info('发现 ' . count($pages['tree']) . ' 个一级页面');
183169
}
184170

185171
$this->info("开始导入 CODING:");
186-
$pageTitles = [];
187-
foreach ($firstLevelLiElements as $firstLevelLiElement) {
188-
$aElement = $xpath->query('a', $firstLevelLiElement)->item(0);
189-
$pageTitles[$aElement->getAttribute('href')] = $aElement->nodeValue;
190-
}
191-
foreach ($pageTitles as $page => $title) {
192-
$this->info('标题:' . $title);
172+
foreach ($pages['tree'] as $page) {
173+
$this->info('标题:' . $pages['titles'][$page]);
193174
$markdown = $this->confluence->htmlFile2Markdown($dataPath . $page);
194175
$mdFilename = substr($page, 0, -5) . '.md';
195176
$zipFilePath = $this->coding->createMarkdownZip($markdown, $dataPath, $mdFilename);

app/Confluence.php

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
namespace App;
44

5+
use JetBrains\PhpStorm\ArrayShape;
56
use League\HTMLToMarkdown\HtmlConverter;
7+
use phpDocumentor\Reflection\Types\Array_;
68

79
class Confluence
810
{
@@ -38,4 +40,44 @@ public function htmlFile2Markdown(string $filename)
3840
$html = $this->document->saveHTML($this->document->getElementById('main-content'));
3941
return $this->htmlConverter->convert($html);
4042
}
43+
44+
/**
45+
* @param \DOMDocument $document
46+
* @return array ['tree' => "array", 'titles' => "array"]
47+
* @todo document 对象和本类别的方法不一致
48+
*/
49+
public function parseAvailablePages(\DOMDocument $document): array
50+
{
51+
$pages = [
52+
'tree' => [],
53+
'titles' => [],
54+
];
55+
$divElements = $document->getElementById('content')->getElementsByTagName('div');
56+
$divElement = null;
57+
foreach ($divElements as $divElement) {
58+
if ($divElement->getAttribute('class') != 'pageSection') {
59+
continue;
60+
}
61+
$h2Element = $divElement->getElementsByTagName('h2')[0];
62+
if (!empty($h2Element) && $h2Element->nodeValue == 'Available Pages:') {
63+
break;
64+
}
65+
}
66+
if (empty($divElement)) {
67+
return $pages;
68+
}
69+
70+
$xpath = new \DOMXPath($document);
71+
$firstLevelLiElements = $xpath->query('ul/li', $divElement);
72+
if ($firstLevelLiElements->count() == 0) {
73+
return $pages;
74+
}
75+
76+
foreach ($firstLevelLiElements as $firstLevelLiElement) {
77+
$aElement = $xpath->query('a', $firstLevelLiElement)->item(0);
78+
$pages['tree'][] = $aElement->getAttribute('href');
79+
$pages['titles'][$aElement->getAttribute('href')] = $aElement->nodeValue;
80+
}
81+
return $pages;
82+
}
4183
}

0 commit comments

Comments
 (0)