1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include <myhtml/myhtml.h>
6
+ #include <myhtml/serialization.h>
7
+ #include <mycss/selectors/serialization.h>
8
+ #include <modest/finder/finder.h>
9
+
10
+ char * readeof (){
11
+ const static int buffer_size = 1024 ;
12
+ char buffer [buffer_size ];
13
+ size_t content_size = 1 ; // \0
14
+ char * content = malloc (sizeof (char )* buffer_size );
15
+ if (content == NULL ){
16
+ perror ("Failed to allocate" );
17
+ exit (EXIT_FAILURE );
18
+ }
19
+ content [0 ] = '\0' ;
20
+ while (fgets (buffer , buffer_size , stdin )){
21
+ char * content_old = content ;
22
+ content_size += strlen (buffer );
23
+ content = realloc (content , content_size );
24
+ if (content == NULL ){
25
+ perror ("Failed to allocate" );
26
+ free (content_old );
27
+ exit (EXIT_FAILURE );
28
+ }
29
+ strcat (content , buffer );
30
+ }
31
+ return content ;
32
+ }
33
+
34
+ unsigned int serializer_log (const char * data , size_t len , void * ctx ){
35
+ printf ("%.*s" , (int )len , data );
36
+ return 0 ;
37
+ }
38
+
39
+ void opthandler (const char * arg , const char * progname ){
40
+ if (!strcmp (arg , "help" ) || !strcmp (arg , "h" )){
41
+ fprintf (stderr , "hq (html query) - commandline HTML processor © Robin Broda, 2018\n" );
42
+ fprintf (stderr , "Usage: %s [options] <selector> <mode> [mode argument]\n\n" , progname );
43
+ fprintf (stderr , "Options:\n" );
44
+ fprintf (stderr , "-h, --help\tshow this text\n" );
45
+ fprintf (stderr , "\n" );
46
+ fprintf (stderr , "<selector>\tselector to match\n" );
47
+ fprintf (stderr , "<mode>\t\tprocessing mode\n" );
48
+ fprintf (stderr , "\t\tmay be one of { data, text, attr }:\n" );
49
+ fprintf (stderr , "\t\tdata - return raw html of matching elements\n" );
50
+ fprintf (stderr , "\t\ttext - return inner text of matching elements\n" );
51
+ fprintf (stderr , "\t\tattr - return attribute value X of matching elements\n" );
52
+ fprintf (stderr , "\t\t\t[mode argument] - attribute to return\n" );
53
+ exit (EXIT_SUCCESS );
54
+ }
55
+ }
56
+
57
+ int main (int argc , const char * argv []){
58
+ if (argc == 1 ) opthandler ("help" , argv [0 ]);
59
+
60
+ size_t shifts = 0 ; // offset of new argv
61
+ while (argc > 1 ){
62
+ if (argv [1 ][0 ] == '-' ){
63
+ const char * arg = argv [1 ];
64
+ if (arg [1 ] == '-' ){
65
+ const char * longarg = arg + 2 ;
66
+ opthandler (longarg , 0 [argv - shifts ]);
67
+ }else {
68
+ for (size_t i = 1 ; i < strlen (arg ); i ++ ){
69
+ const char shortarg [2 ] = { arg [i ], '\0' };
70
+ opthandler (shortarg , 0 [argv - shifts ]);
71
+ }
72
+ }
73
+ shifts ++ ;
74
+ argv ++ ;
75
+ argc -- ;
76
+ }else {
77
+ argv [0 ] = 0 [argv - shifts ]; // restore argv[0]
78
+ break ;
79
+ }
80
+ }
81
+
82
+ const char * selector ;
83
+ if (argc > 1 ){
84
+ selector = argv [1 ];
85
+ }else {
86
+ fprintf (stderr , "No selector given\n" );
87
+ exit (EXIT_FAILURE );
88
+ }
89
+
90
+ const char * mode ;
91
+ if (argc > 2 ){
92
+ mode = argv [2 ];
93
+ }else {
94
+ fprintf (stderr , "No mode given\n" );
95
+ exit (EXIT_FAILURE );
96
+ }
97
+
98
+ char * input = readeof ();
99
+
100
+ myhtml_t * myhtml = myhtml_create ();
101
+ mystatus_t mystatus = myhtml_init (myhtml , MyHTML_OPTIONS_DEFAULT , 1 , 0 );
102
+ if (mystatus ){
103
+ fprintf (stderr , "Failed to init MyHTML\n" );
104
+ exit (EXIT_FAILURE );
105
+ }
106
+
107
+ myhtml_tree_t * html_tree = myhtml_tree_create ();
108
+ mystatus = myhtml_tree_init (html_tree , myhtml );
109
+ if (mystatus ){
110
+ fprintf (stderr , "Failed to init MyHTML tree\n" );
111
+ exit (EXIT_FAILURE );
112
+ }
113
+
114
+ mystatus = myhtml_parse (html_tree , MyENCODING_UTF_8 , input , strlen (input ));
115
+ if (mystatus ){
116
+ fprintf (stderr , "Failed to parse HTML\n" );
117
+ exit (EXIT_FAILURE );
118
+ }
119
+
120
+ mycss_t * mycss = mycss_create ();
121
+ mystatus = mycss_init (mycss );
122
+ if (mystatus ){
123
+ fprintf (stderr , "Failed to init MyCSS\n" );
124
+ exit (EXIT_FAILURE );
125
+ }
126
+
127
+ mycss_entry_t * css_entry = mycss_entry_create ();
128
+ mystatus = mycss_entry_init (mycss , css_entry );
129
+ if (mystatus ){
130
+ fprintf (stderr , "Failed to init MyCSS entry\n" );
131
+ exit (EXIT_FAILURE );
132
+ }
133
+
134
+ modest_finder_t * finder = modest_finder_create_simple ();
135
+
136
+ mycss_selectors_list_t * selectors_list = mycss_selectors_parse (
137
+ mycss_entry_selectors (css_entry ),
138
+ MyENCODING_UTF_8 ,
139
+ selector , strlen (selector ), & mystatus
140
+ );
141
+
142
+ if (selectors_list == NULL || (selectors_list -> flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD )){
143
+ fprintf (stderr , "Bad selector\n" );
144
+ exit (EXIT_FAILURE );
145
+ }
146
+
147
+ myhtml_collection_t * collection = NULL ;
148
+ modest_finder_by_selectors_list (finder , html_tree -> node_html , selectors_list , & collection );
149
+
150
+ if (collection ){
151
+ for (size_t i = 0 ; i < collection -> length ; i ++ ){
152
+ if (!strcmp (mode , "text" )){
153
+ myhtml_serialization_tree_callback (collection -> list [i ]-> child , serializer_log , NULL );
154
+ printf ("\n" );
155
+ }else if (!strcmp (mode , "data" )){
156
+ myhtml_serialization_tree_callback (collection -> list [i ], serializer_log , NULL );
157
+ printf ("\n" );
158
+ }else if (!strcmp (mode , "attr" )){
159
+ const char * attr_name ;
160
+ if (argc > 3 ){
161
+ attr_name = argv [3 ];
162
+ }else {
163
+ fprintf (stderr , "No attr name given" );
164
+ exit (EXIT_FAILURE );
165
+ }
166
+ myhtml_tree_node_t * node = collection -> list [i ];
167
+ myhtml_token_node_t * token = node -> token ;
168
+ if (token == NULL ) continue ;
169
+ myhtml_token_attr_t * attr = token -> attr_first ;
170
+ if (attr == NULL ) continue ;
171
+
172
+ do {
173
+ if (!strcmp (attr_name , mycore_string_data (& attr -> key ))){
174
+ printf ("%s\n" , mycore_string_data (& attr -> value ));
175
+ }
176
+ if (attr != token -> attr_last ) attr = attr -> next ;
177
+ }while (attr != token -> attr_last );
178
+ }else {
179
+ fprintf (stderr , "invalid mode: '%s'\n" , mode );
180
+ exit (EXIT_FAILURE );
181
+ }
182
+ }
183
+ }
184
+
185
+ // cleanup
186
+ myhtml_collection_destroy (collection );
187
+ mycss_selectors_list_destroy (mycss_entry_selectors (css_entry ), selectors_list , true);
188
+ modest_finder_destroy (finder , true);
189
+ mycss_destroy (css_entry -> mycss , true);
190
+ mycss_entry_destroy (css_entry , true);
191
+ myhtml_destroy (html_tree -> myhtml );
192
+ myhtml_tree_destroy (html_tree );
193
+ free (input );
194
+ return 0 ;
195
+ }
0 commit comments