00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "defs.h"
00022 #include "ssi.h"
00023
00024
00025
00026
00027
00028
00029
00030 #define MAX_LINK 100
00031 #define MAX_HREF 120
00032 #define MAX_ANCHORS 200
00033
00034 static struct {
00035 char a_link[MAX_LINK];
00036 char a_href[MAX_HREF];
00037 } anchor[MAX_ANCHORS];
00038 static int nanchors;
00039
00040 static void add_href(const char *tag)
00041 {
00042 const char *p, *href = NULL;
00043 char delim = '\0';
00044 int nhref;
00045
00046 if (nanchors < MAX_ANCHORS) {
00047 anchor[nanchors].a_href[0] = '\0';
00048 anchor[nanchors].a_link[0] = '\0';
00049 for (p = tag; *p && *p != delim; p++)
00050 if (href == NULL && strncasecmp(p, "HREF", 4) == 0) {
00051 p += 4;
00052 while (isspace((int)*p))
00053 p++;
00054 if (*p == '=') {
00055 p++;
00056 while (isspace((int)*p))
00057 p++;
00058 if (*p == '"' || *p == '\'')
00059 delim = *p;
00060 else
00061 p--;
00062 href = p + 1;
00063 }
00064 }
00065 if (href != NULL) {
00066 nhref = p - href;
00067 if (nhref > MAX_HREF - 1)
00068 nhref = MAX_HREF - 1;
00069 strncpy(anchor[nanchors].a_href, href, nhref);
00070 anchor[nanchors].a_href[nhref] = '\0';
00071 }
00072 }
00073 }
00074
00075 static void add_link(const char *link)
00076 {
00077 int nlink;
00078
00079 if (nanchors < MAX_ANCHORS && anchor[nanchors].a_href[0] != '\0') {
00080 nlink = strlen(link);
00081 if (nlink > MAX_LINK - 1)
00082 nlink = MAX_LINK - 1;
00083 strncpy(anchor[nanchors].a_link, link, nlink);
00084 anchor[nanchors].a_link[nlink] = '\0';
00085 nanchors++;
00086 }
00087 }
00088
00089 static const char *go_link(int limit)
00090 {
00091 int i;
00092 long n;
00093 static char buf[MAX_HREF];
00094
00095 if (limit <= 0 || limit > nanchors)
00096 limit = nanchors;
00097 for (i = 0; i < limit; i++)
00098 printf("%d:\t%-20.20s --> %s\n", i + 1, anchor[i].a_link,
00099 anchor[i].a_href);
00100 if (limit < nanchors)
00101 printf("\t(%d more links not shown)\n", nanchors - limit);
00102 while (true) {
00103 printf("Link number, URL, or Ctrl-D to quit]? ");
00104 if (fgets(buf, sizeof(buf), stdin) == NULL || buf[0] == 'q')
00105 return NULL;
00106 for (i = 0; buf[i]; i++)
00107 if (!isspace((int)buf[i]) && !isdigit((int)buf[i])) {
00108 i = strlen(buf);
00109 if (i > 0 && buf[i - 1] == '\n')
00110 buf[i - 1] = '\0';
00111 return buf;
00112 }
00113 n = strtol(buf, NULL, 10);
00114 if (n > 0 && n <= limit)
00115 return anchor[n - 1].a_href;
00116 printf("Invalid number\n");
00117 }
00118 }
00119
00120 static void parse_url(const char *url, char *host, size_t max_host,
00121 char *path, size_t max_path)
00122 {
00123 const char *slash;
00124 int host_len;
00125 bool new_host = false;
00126
00127 if (strncasecmp(url, "http://", 7) == 0) {
00128 url += 7;
00129 new_host = true;
00130 }
00131 if (strncasecmp(url, "https://", 8) == 0) {
00132 url += 8;
00133 new_host = true;
00134 }
00135 if (new_host) {
00136 slash = strchr(url, '/');
00137 if (slash == NULL) {
00138 strcpy(path, "/");
00139 slash = &url[strlen(url)];
00140 }
00141 else {
00142 strncpy(path, slash, max_path);
00143 path[max_path - 1] = '\0';
00144 }
00145 host_len = slash - url;
00146 if (host_len > max_host - 1)
00147 host_len = max_host - 1;
00148 strncpy(host, url, host_len);
00149 host[host_len] = '\0';
00150 }
00151 else {
00152 strncpy(path, url, max_path);
00153 path[max_path - 1] = '\0';
00154 }
00155 }
00156
00157 static void makepath(char *path, size_t max_path, const char *newpath)
00158 {
00159 char *filename;
00160
00161 if (newpath[0] == '/')
00162 strncpy(path, newpath, max_path);
00163 else {
00164 filename = strrchr(path, '/');
00165 if (filename == NULL)
00166 filename = path;
00167 else
00168 filename++;
00169
00170 if (strchr(filename, '.') != NULL)
00171 *filename = '\0';
00172 if (path[strlen(path) - 1] != '/')
00173 strncat(path, "/", max_path);
00174 strncat(path, newpath, max_path);
00175 }
00176 }
00177
00178 static void display(const char *s, int len, bool showtags,
00179 bool want_display)
00180 {
00181 static enum {DSP_NORMAL, DSP_INTAG, DSP_INENTITY} state = DSP_NORMAL;
00182 static int taglen, ntag = 0, col = 0, nlink = 0;
00183 static char tag[100], link[MAX_LINK];
00184 static bool inlink = false, inscript = false;
00185
00186 int i, c;
00187
00188 for (i = 0; i < len; i++) {
00189 c = s[i] & 0177;
00190
00191 if (c == '\r') {
00192 #if 0
00193 if (want_display)
00194 putc('@', stdout);
00195 #endif
00196 continue;
00197 }
00198 #if 0
00199 if (c == '\n') {
00200 if (want_display)
00201 putc('#', stdout);
00202 }
00203 #endif
00204 switch (state) {
00205 case DSP_NORMAL:
00206 if (c == '<') {
00207 state = DSP_INTAG;
00208 break;
00209 }
00210 if (inscript)
00211 break;
00212 switch (c) {
00213 case '\n':
00214 if (col > 0) {
00215 if (want_display)
00216 putc('\n', stdout);
00217 col = 0;
00218 }
00219 break;
00220 case '&':
00221 state = DSP_INENTITY;
00222 break;
00223 default:
00224 if (inlink && nlink < sizeof(link) - 1)
00225 link[nlink++] = (char)c;
00226 if (col >= 80) {
00227 if (want_display)
00228 putc('\n', stdout);
00229 col = 0;
00230 }
00231 if (col > 0 || !isspace(c)) {
00232 col++;
00233 if (want_display)
00234 putc(c, stdout);
00235 }
00236 }
00237 break;
00238 case DSP_INTAG:
00239 if (c == '>') {
00240 char tagname[20];
00241
00242 tag[ntag] = '\0';
00243 if (showtags)
00244 printf("<%s>\n", tag);
00245 ntag = 0;
00246 state = DSP_NORMAL;
00247 taglen = strcspn(tag, " \t\n");
00248 if (taglen > sizeof(tagname) - 1)
00249 taglen = sizeof(tagname) - 1;
00250 strncpy(tagname, tag, taglen);
00251 tagname[taglen] = '\0';
00252 if (strcasecmp(tagname, "P") == 0) {
00253 if (want_display)
00254 fputs("\n\n", stdout);
00255 col = 0;
00256 }
00257 else if (strcasecmp(tagname, "BR") == 0) {
00258 if (want_display)
00259 fputs("\n", stdout);
00260 col = 0;
00261 }
00262 else if (strcasecmp(tagname, "A") == 0) {
00263 inlink = true;
00264 nlink = 0;
00265 add_href(tag);
00266 }
00267 else if (inlink && strcasecmp(tagname, "/A") == 0) {
00268 link[nlink] = '\0';
00269 add_link(link);
00270 inlink = false;
00271 }
00272 else if (strcasecmp(tagname, "SCRIPT") == 0)
00273 inscript = true;
00274 else if (inscript && strcasecmp(tagname, "/SCRIPT") == 0)
00275 inscript = false;
00276 }
00277 else if ((!isspace(c) || ntag > 0) && ntag < sizeof(tag) - 1)
00278 tag[ntag++] = c;
00279 break;
00280 case DSP_INENTITY:
00281 if (c == ';') {
00282 state = DSP_NORMAL;
00283 if (want_display)
00284 putc('?', stdout);
00285 }
00286 break;
00287 default:
00288 fprintf(stderr, __FILE__ ":" __func__ " -- bad state\n");
00289 exit(EXIT_FAILURE);
00290 }
00291 }
00292 }
00293
00294 #define FN_TRANSFER_ENCODING "Transfer-Encoding"
00295 #define FN_CONTENT_LENGTH "Content-Length"
00296 #define FN_LOCATION "Location"
00297
00298
00299
00300
00301 static bool response(char *s, int len, bool start, bool showtags,
00302 bool want_display, char **newloc)
00303 {
00304 char *body, *fn, *fv, *colon;
00305 static bool ischunked;
00306 static int chunklen;
00307
00308
00309 *newloc = NULL;
00310 if (start) {
00311 ischunked = false;
00312 chunklen = 0;
00313 body = strstr(s, "\r\n\r\n");
00314 if (body == NULL) {
00315 printf("Invalid header in response:\n");
00316 printf("%.500s\n", s);
00317 return false;
00318 }
00319 *body = '\0';
00320 body += 4;
00321 if ((fn = strstr(s, FN_TRANSFER_ENCODING)) != NULL) {
00322 if ((colon = strchr(fn, ':')) != NULL) {
00323 fv = colon + 1;
00324 while (isspace((int)*fv))
00325 fv++;
00326 if (strncasecmp(fv, "chunked", 7) == 0)
00327 ischunked = true;
00328 }
00329 }
00330 if ((fn = strstr(s, FN_CONTENT_LENGTH)) != NULL) {
00331 if ((colon = strchr(fn, ':')) != NULL) {
00332 fv = colon + 1;
00333 while (isspace((int)*fv))
00334 fv++;
00335 chunklen = (int)strtol(fv, NULL, 10);
00336 }
00337 }
00338 if ((fn = strstr(s, FN_LOCATION)) != NULL) {
00339 if ((colon = strchr(fn, ':')) != NULL) {
00340 fv = colon + 1;
00341 while (isspace((int)*fv))
00342 fv++;
00343 printf("Location = \"%s\"\n", fv);
00344 *newloc = fv;
00345 return true;
00346 }
00347 }
00348 len -= body - s;
00349
00350 s = body;
00351
00352 }
00353 while (len > 0) {
00354 int seglen;
00355
00356 if (ischunked && chunklen <= 0) {
00357 char *end;
00358
00359 chunklen = strtol(s, &end, 16);
00360
00361 if (chunklen > 0)
00362 chunklen += 2;
00363 if (strncmp(end, "\r\n", 2) == 0)
00364 end += 2;
00365 len -= end - s;
00366 s = end;
00367 }
00368 if (chunklen <= 0) {
00369
00370 return false;
00371 }
00372 seglen = chunklen;
00373 if (seglen > len)
00374 seglen = len;
00375 #if 0
00376 if (seglen < 40) {
00377 char buf[50];
00378 strncpy(buf, s, seglen);
00379 buf[seglen] = '\0';
00380 printf("display(%s, %d)\n", buf, seglen);
00381 }
00382 else
00383 printf("display(%.15s ... %.15s, %d)\n", s, &s[seglen - 15], seglen);
00384 #endif
00385 display(s, seglen, showtags, want_display);
00386 len -= seglen;
00387 s += seglen;
00388 chunklen -= seglen;
00389 }
00390 return chunklen > 0;
00391 }
00392
00393 int main(int argc, char *argv[])
00394 {
00395 SSI *ssip = NULL;
00396 char msg[1600], *newloc;
00397 const char *url = "http://www.gnu.org";
00398 ssize_t nrcv, nrcv_total;
00399 int fd, c, limit = 0;
00400 bool showtags = false, retry = false, want_display = true,
00401 raw_display = false;
00402 char host[500] = "", prevhost[500] = "",
00403 path[MAX_HREF] = "/", newpath[MAX_HREF];
00404
00405 while ((c = getopt(argc, argv, ":arstu:")) != -1)
00406 switch(c) {
00407 case 'a':
00408 limit = 10;
00409 break;
00410 case 's':
00411 want_display = false;
00412 break;
00413 case 'r':
00414 raw_display = true;
00415 want_display = false;
00416 break;
00417 case 't':
00418 showtags = true;
00419 break;
00420 case 'u':
00421 url = optarg;
00422 break;
00423 case ':':
00424 fprintf(stderr, "Option -%c requires an operand\n", optopt);
00425
00426 default:
00427 fprintf(stderr, "Usage: br [-a] [-s] [-t] [-u url]\n");
00428 exit(EXIT_FAILURE);
00429 }
00430 while (true) {
00431 bool start;
00432
00433 if (newloc != NULL) {
00434 printf("newloc = %s\n", newloc);
00435 url = newloc;
00436 newloc = NULL;
00437 retry = false;
00438 }
00439 parse_url(url, host, sizeof(host), newpath, sizeof(newpath));
00440 if (!retry)
00441 makepath(path, sizeof(path), newpath);
00442 if (strcmp(prevhost, host) != 0 || ssip == NULL) {
00443 snprintf(msg, sizeof(msg), "//%s%s", host,
00444 strchr(host, ':') == NULL ? ":80" : "");
00445 printf("Connecting to host \"%s\"\n", msg);
00446 ssip = ssi_open(msg, false);
00447 ec_neg1( fd = ssi_get_server_fd(ssip) )
00448 if (ssip == NULL) {
00449
00450 syserr_print("Can't connect");
00451 if ((url = go_link(limit)) == NULL)
00452 break;
00453 continue;
00454 }
00455 strcpy(prevhost, host);
00456 printf("\t...connected.\n");
00457 }
00458 else
00459 printf("Continuing with exisiting connection\n");
00460 snprintf(msg, sizeof(msg), "GET %s HTTP/1.1\nHost: %s\n\n",
00461 path, host);
00462 printf("Browsing URL \"%s\"\n", path);
00463 nrcv_total = 0;
00464 nanchors = 0;
00465 memset(anchor, 0, sizeof(anchor));
00466 ec_neg1( writeall(fd, msg, strlen(msg)) )
00467 while (true) {
00468 start = nrcv_total == 0;
00469 ec_neg1( nrcv = read(fd, msg, sizeof(msg)) )
00470 if (nrcv == 0)
00471 break;
00472 msg[nrcv] = '\0';
00473 nrcv_total += nrcv;
00474 if (raw_display)
00475 printf("\n\n*** %d bytes in message ***\n%s\n"
00476 "***************************\n", nrcv,
00477 start ? msg : " (message text not shown)");
00478 if (!response(msg, nrcv, start, showtags, want_display,
00479 &newloc) || newloc != NULL)
00480 break;
00481 }
00482 if (newloc != NULL)
00483 continue;
00484 if (nrcv_total == 0 && !retry) {
00485 printf("Connection dropped by host\n");
00486 ec_false( ssi_close(ssip) )
00487 ssip = NULL;
00488 retry = true;
00489 continue;
00490 }
00491 printf("\n%d total bytes received\n", nrcv_total);
00492 retry = false;
00493 if ((url = go_link(limit)) == NULL)
00494 break;
00495 }
00496 if (ssip != NULL)
00497 ec_false( ssi_close(ssip) )
00498 printf("Done.\n");
00499 exit(EXIT_SUCCESS);
00500
00501 EC_CLEANUP_BGN
00502 return EXIT_FAILURE;
00503 EC_CLEANUP_END
00504 }