Skip to content

Commit de1b9d9

Browse files
committed
Init
0 parents  commit de1b9d9

File tree

3 files changed

+270
-0
lines changed

3 files changed

+270
-0
lines changed

README.md

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# hq
2+
3+
A HTML processor inspired by jq (https://github.com/stedolan/jq)
4+
5+
## Building & Usage
6+
7+
### Building
8+
9+
#### Dependencies
10+
- meson
11+
- modest (https://github.com/lexborisov/Modest)
12+
13+
#### Build
14+
`meson build && ninja -C build`
15+
16+
The executable will be built to `build/hq`.
17+
18+
### Usage
19+
20+
#### Dependencies
21+
- modest (https://github.com/lexborisov/Modest)
22+
23+
#### Use
24+
25+
Application help text:
26+
```
27+
hq (html query) - commandline HTML processor © Robin Broda, 2018
28+
Usage: build/hq [options] <selector> <mode> [mode argument]
29+
30+
Options:
31+
-h, --help show this text
32+
33+
<selector> selector to match
34+
<mode> processing mode
35+
may be one of { data, text, attr }:
36+
data - return raw html of matching elements
37+
text - return inner text of matching elements
38+
attr - return attribute value X of matching elements
39+
[mode argument] - attribute to return
40+
```
41+
42+
Example usage:
43+
44+
`curl -s https://coderobe.net | hq a data`
45+
```
46+
<a href="https://keybase.io/coderobe">Keybase (coderobe)</a>
47+
<a href="https://github.com/coderobe">Github (coderobe)</a>
48+
<a href="https://twitter.com/coderobe">Twitter (coderobe)</a>
49+
```
50+
51+
52+
`curl -s https://coderobe.net | hq a text`
53+
```
54+
Keybase (coderobe)
55+
Github (coderobe)
56+
Twitter (coderobe)
57+
```
58+
59+
`curl -s https://coderobe.net | hq a attr href`
60+
```
61+
https://keybase.io/coderobe
62+
https://github.com/coderobe
63+
https://twitter.com/coderobe
64+
```
65+
66+
You get the idea.
67+
68+
## License
69+
70+
This work, written by Robin Broda (coderobe) in 2018, is licensed under the terms of the GNU Affero General Public License v3.0

main.c

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#include <stdlib.h>
2+
#include <stdio.h>
3+
#include <string.h>
4+
5+
#include <myhtml/myhtml.h>
6+
#include <myhtml/serialization.h>
7+
#include <mycss/selectors/serialization.h>
8+
#include <modest/finder/finder.h>
9+
10+
char* readeof(){
11+
const static int buffer_size = 1024;
12+
char buffer[buffer_size];
13+
size_t content_size = 1; // \0
14+
char* content = malloc(sizeof(char)*buffer_size);
15+
if(content == NULL){
16+
perror("Failed to allocate");
17+
exit(EXIT_FAILURE);
18+
}
19+
content[0] = '\0';
20+
while(fgets(buffer, buffer_size, stdin)){
21+
char* content_old = content;
22+
content_size += strlen(buffer);
23+
content = realloc(content, content_size);
24+
if(content == NULL){
25+
perror("Failed to allocate");
26+
free(content_old);
27+
exit(EXIT_FAILURE);
28+
}
29+
strcat(content, buffer);
30+
}
31+
return content;
32+
}
33+
34+
unsigned int serializer_log(const char* data, size_t len, void* ctx){
35+
printf("%.*s", (int)len, data);
36+
return 0;
37+
}
38+
39+
void opthandler(const char* arg, const char* progname){
40+
if(!strcmp(arg, "help") || !strcmp(arg, "h")){
41+
fprintf(stderr, "hq (html query) - commandline HTML processor © Robin Broda, 2018\n");
42+
fprintf(stderr, "Usage: %s [options] <selector> <mode> [mode argument]\n\n", progname);
43+
fprintf(stderr, "Options:\n");
44+
fprintf(stderr, "-h, --help\tshow this text\n");
45+
fprintf(stderr, "\n");
46+
fprintf(stderr, "<selector>\tselector to match\n");
47+
fprintf(stderr, "<mode>\t\tprocessing mode\n");
48+
fprintf(stderr, "\t\tmay be one of { data, text, attr }:\n");
49+
fprintf(stderr, "\t\tdata - return raw html of matching elements\n");
50+
fprintf(stderr, "\t\ttext - return inner text of matching elements\n");
51+
fprintf(stderr, "\t\tattr - return attribute value X of matching elements\n");
52+
fprintf(stderr, "\t\t\t[mode argument] - attribute to return\n");
53+
exit(EXIT_SUCCESS);
54+
}
55+
}
56+
57+
int main(int argc, const char* argv[]){
58+
if(argc == 1) opthandler("help", argv[0]);
59+
60+
size_t shifts = 0; // offset of new argv
61+
while(argc > 1){
62+
if(argv[1][0] == '-'){
63+
const char* arg = argv[1];
64+
if(arg[1] == '-'){
65+
const char* longarg = arg+2;
66+
opthandler(longarg, 0[argv-shifts]);
67+
}else{
68+
for(size_t i = 1; i < strlen(arg); i++){
69+
const char shortarg[2] = { arg[i], '\0' };
70+
opthandler(shortarg, 0[argv-shifts]);
71+
}
72+
}
73+
shifts++;
74+
argv++;
75+
argc--;
76+
}else{
77+
argv[0] = 0[argv-shifts]; // restore argv[0]
78+
break;
79+
}
80+
}
81+
82+
const char* selector;
83+
if(argc > 1){
84+
selector = argv[1];
85+
}else{
86+
fprintf(stderr, "No selector given\n");
87+
exit(EXIT_FAILURE);
88+
}
89+
90+
const char* mode;
91+
if(argc > 2){
92+
mode = argv[2];
93+
}else{
94+
fprintf(stderr, "No mode given\n");
95+
exit(EXIT_FAILURE);
96+
}
97+
98+
char* input = readeof();
99+
100+
myhtml_t* myhtml = myhtml_create();
101+
mystatus_t mystatus = myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0);
102+
if(mystatus){
103+
fprintf(stderr, "Failed to init MyHTML\n");
104+
exit(EXIT_FAILURE);
105+
}
106+
107+
myhtml_tree_t* html_tree = myhtml_tree_create();
108+
mystatus = myhtml_tree_init(html_tree, myhtml);
109+
if(mystatus){
110+
fprintf(stderr, "Failed to init MyHTML tree\n");
111+
exit(EXIT_FAILURE);
112+
}
113+
114+
mystatus = myhtml_parse(html_tree, MyENCODING_UTF_8, input, strlen(input));
115+
if(mystatus){
116+
fprintf(stderr, "Failed to parse HTML\n");
117+
exit(EXIT_FAILURE);
118+
}
119+
120+
mycss_t* mycss = mycss_create();
121+
mystatus = mycss_init(mycss);
122+
if(mystatus){
123+
fprintf(stderr, "Failed to init MyCSS\n");
124+
exit(EXIT_FAILURE);
125+
}
126+
127+
mycss_entry_t* css_entry = mycss_entry_create();
128+
mystatus = mycss_entry_init(mycss, css_entry);
129+
if(mystatus){
130+
fprintf(stderr, "Failed to init MyCSS entry\n");
131+
exit(EXIT_FAILURE);
132+
}
133+
134+
modest_finder_t* finder = modest_finder_create_simple();
135+
136+
mycss_selectors_list_t* selectors_list = mycss_selectors_parse(
137+
mycss_entry_selectors(css_entry),
138+
MyENCODING_UTF_8,
139+
selector, strlen(selector), &mystatus
140+
);
141+
142+
if(selectors_list == NULL || (selectors_list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD)){
143+
fprintf(stderr, "Bad selector\n");
144+
exit(EXIT_FAILURE);
145+
}
146+
147+
myhtml_collection_t* collection = NULL;
148+
modest_finder_by_selectors_list(finder, html_tree->node_html, selectors_list, &collection);
149+
150+
if(collection){
151+
for(size_t i = 0; i < collection->length; i++){
152+
if(!strcmp(mode, "text")){
153+
myhtml_serialization_tree_callback(collection->list[i]->child, serializer_log, NULL);
154+
printf("\n");
155+
}else if(!strcmp(mode, "data")){
156+
myhtml_serialization_tree_callback(collection->list[i], serializer_log, NULL);
157+
printf("\n");
158+
}else if(!strcmp(mode, "attr")){
159+
const char* attr_name;
160+
if(argc > 3){
161+
attr_name = argv[3];
162+
}else{
163+
fprintf(stderr, "No attr name given");
164+
exit(EXIT_FAILURE);
165+
}
166+
myhtml_tree_node_t* node = collection->list[i];
167+
myhtml_token_node_t* token = node->token;
168+
if(token == NULL) continue;
169+
myhtml_token_attr_t* attr = token->attr_first;
170+
if(attr == NULL) continue;
171+
172+
do{
173+
if(!strcmp(attr_name, mycore_string_data(&attr->key))){
174+
printf("%s\n", mycore_string_data(&attr->value));
175+
}
176+
if(attr != token->attr_last) attr = attr->next;
177+
}while(attr != token->attr_last);
178+
}else{
179+
fprintf(stderr, "invalid mode: '%s'\n", mode);
180+
exit(EXIT_FAILURE);
181+
}
182+
}
183+
}
184+
185+
// cleanup
186+
myhtml_collection_destroy(collection);
187+
mycss_selectors_list_destroy(mycss_entry_selectors(css_entry), selectors_list, true);
188+
modest_finder_destroy(finder, true);
189+
mycss_destroy(css_entry->mycss, true);
190+
mycss_entry_destroy(css_entry, true);
191+
myhtml_destroy(html_tree->myhtml);
192+
myhtml_tree_destroy(html_tree);
193+
free(input);
194+
return 0;
195+
}

meson.build

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
project('hq', 'c')
2+
modest = dependency('modest')
3+
executable('hq', 'main.c',
4+
dependencies: [modest]
5+
)

0 commit comments

Comments
 (0)