00001 /* 00002 More-complete Web Browser 00003 AUP2, Sec. 8.04.3 (not in book) 00004 00005 Copyright 2003 by Marc J. Rochkind. All rights reserved. 00006 May be copied only for purposes and under conditions described 00007 on the Web page www.basepath.com/aup/copyright.htm. 00008 00009 The Example Files are provided "as is," without any warranty; 00010 without even the implied warranty of merchantability or fitness 00011 for a particular purpose. The author and his publisher are not 00012 responsible for any damages, direct or incidental, resulting 00013 from the use or non-use of these Example Files. 00014 00015 The Example Files may contain defects, and some contain deliberate 00016 coding mistakes that were included for educational reasons. 00017 You are responsible for determining if and how the Example Files 00018 are to be used. 00019 00020 */ 00021 #include "defs.h" 00022 #include "ssi.h" 00023 00024 /* 00025 http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] 00026 */ 00027 00028 /* use basename? *********************************************************/ 00029 00030 #define MAX_LINK 100 00031 #define MAX_HREF 120 00032 #define MAX_ANCHORS 200 00033 00034 static struct { 00035 char a_link[MAX_LINK]; 00036 char a_href[MAX_HREF]; 00037 } anchor[MAX_ANCHORS]; 00038 static int nanchors; 00039 00040 static void add_href(const char *tag) 00041 { 00042 const char *p, *href = NULL; 00043 char delim = '\0'; 00044 int nhref; 00045 00046 if (nanchors < MAX_ANCHORS) { 00047 anchor[nanchors].a_href[0] = '\0'; 00048 anchor[nanchors].a_link[0] = '\0'; 00049 for (p = tag; *p && *p != delim; p++) 00050 if (href == NULL && strncasecmp(p, "HREF", 4) == 0) { 00051 p += 4; 00052 while (isspace((int)*p)) 00053 p++; 00054 if (*p == '=') { 00055 p++; 00056 while (isspace((int)*p)) 00057 p++; 00058 if (*p == '"' || *p == '\'') 00059 delim = *p; 00060 else 00061 p--; 00062 href = p + 1; 00063 } 00064 } 00065 if (href != NULL) { 00066 nhref = p - href; 00067 if (nhref > MAX_HREF - 1) 00068 nhref = MAX_HREF - 1; 00069 strncpy(anchor[nanchors].a_href, href, nhref); 00070 anchor[nanchors].a_href[nhref] = '\0'; 00071 } 00072 } 00073 } 00074 00075 static void add_link(const char *link) 00076 { 00077 int nlink; 00078 00079 if (nanchors < MAX_ANCHORS && anchor[nanchors].a_href[0] != '\0') { 00080 nlink = strlen(link); 00081 if (nlink > MAX_LINK - 1) 00082 nlink = MAX_LINK - 1; 00083 strncpy(anchor[nanchors].a_link, link, nlink); 00084 anchor[nanchors].a_link[nlink] = '\0'; 00085 nanchors++; 00086 } 00087 } 00088 00089 static const char *go_link(int limit) 00090 { 00091 int i; 00092 long n; 00093 static char buf[MAX_HREF]; 00094 00095 if (limit <= 0 || limit > nanchors) 00096 limit = nanchors; 00097 for (i = 0; i < limit; i++) 00098 printf("%d:\t%-20.20s --> %s\n", i + 1, anchor[i].a_link, 00099 anchor[i].a_href); 00100 if (limit < nanchors) 00101 printf("\t(%d more links not shown)\n", nanchors - limit); 00102 while (true) { 00103 printf("Link number, URL, or Ctrl-D to quit]? "); 00104 if (fgets(buf, sizeof(buf), stdin) == NULL || buf[0] == 'q') 00105 return NULL; 00106 for (i = 0; buf[i]; i++) 00107 if (!isspace((int)buf[i]) && !isdigit((int)buf[i])) { 00108 i = strlen(buf); 00109 if (i > 0 && buf[i - 1] == '\n') 00110 buf[i - 1] = '\0'; 00111 return buf; 00112 } 00113 n = strtol(buf, NULL, 10); 00114 if (n > 0 && n <= limit) 00115 return anchor[n - 1].a_href; 00116 printf("Invalid number\n"); 00117 } 00118 } 00119 00120 static void parse_url(const char *url, char *host, size_t max_host, 00121 char *path, size_t max_path) 00122 { 00123 const char *slash; 00124 int host_len; 00125 bool new_host = false; 00126 00127 if (strncasecmp(url, "http://", 7) == 0) { 00128 url += 7; 00129 new_host = true; 00130 } 00131 if (strncasecmp(url, "https://", 8) == 0) { 00132 url += 8; 00133 new_host = true; 00134 } 00135 if (new_host) { 00136 slash = strchr(url, '/'); 00137 if (slash == NULL) { 00138 strcpy(path, "/"); 00139 slash = &url[strlen(url)]; 00140 } 00141 else { 00142 strncpy(path, slash, max_path); 00143 path[max_path - 1] = '\0'; 00144 } 00145 host_len = slash - url; 00146 if (host_len > max_host - 1) 00147 host_len = max_host - 1; 00148 strncpy(host, url, host_len); 00149 host[host_len] = '\0'; 00150 } 00151 else { 00152 strncpy(path, url, max_path); 00153 path[max_path - 1] = '\0'; 00154 } 00155 } 00156 00157 static void makepath(char *path, size_t max_path, const char *newpath) 00158 { 00159 char *filename; 00160 00161 if (newpath[0] == '/') 00162 strncpy(path, newpath, max_path); 00163 else { 00164 filename = strrchr(path, '/'); 00165 if (filename == NULL) 00166 filename = path; 00167 else 00168 filename++; 00169 /* anything with a dot is a file name to us */ 00170 if (strchr(filename, '.') != NULL) 00171 *filename = '\0'; 00172 if (path[strlen(path) - 1] != '/') 00173 strncat(path, "/", max_path); 00174 strncat(path, newpath, max_path); 00175 } 00176 } 00177 00178 static void display(const char *s, int len, bool showtags, 00179 bool want_display) 00180 { 00181 static enum {DSP_NORMAL, DSP_INTAG, DSP_INENTITY} state = DSP_NORMAL; 00182 static int taglen, ntag = 0, col = 0, nlink = 0; 00183 static char tag[100], link[MAX_LINK]; 00184 static bool inlink = false, inscript = false; 00185 /* substates of DSP_NORMAL */ 00186 int i, c; 00187 00188 for (i = 0; i < len; i++) { 00189 c = s[i] & 0177; 00190 00191 if (c == '\r') { 00192 #if 0 00193 if (want_display) 00194 putc('@', stdout); 00195 #endif 00196 continue; 00197 } 00198 #if 0 00199 if (c == '\n') { 00200 if (want_display) 00201 putc('#', stdout); 00202 } 00203 #endif 00204 switch (state) { 00205 case DSP_NORMAL: 00206 if (c == '<') { 00207 state = DSP_INTAG; 00208 break; 00209 } 00210 if (inscript) 00211 break; 00212 switch (c) { 00213 case '\n': 00214 if (col > 0) { 00215 if (want_display) 00216 putc('\n', stdout); 00217 col = 0; 00218 } 00219 break; 00220 case '&': 00221 state = DSP_INENTITY; 00222 break; 00223 default: 00224 if (inlink && nlink < sizeof(link) - 1) 00225 link[nlink++] = (char)c; 00226 if (col >= 80) { 00227 if (want_display) 00228 putc('\n', stdout); 00229 col = 0; 00230 } 00231 if (col > 0 || !isspace(c)) { 00232 col++; 00233 if (want_display) 00234 putc(c, stdout); 00235 } 00236 } 00237 break; 00238 case DSP_INTAG: 00239 if (c == '>') { 00240 char tagname[20]; 00241 00242 tag[ntag] = '\0'; 00243 if (showtags) 00244 printf("<%s>\n", tag); 00245 ntag = 0; 00246 state = DSP_NORMAL; 00247 taglen = strcspn(tag, " \t\n"); 00248 if (taglen > sizeof(tagname) - 1) 00249 taglen = sizeof(tagname) - 1; 00250 strncpy(tagname, tag, taglen); 00251 tagname[taglen] = '\0'; 00252 if (strcasecmp(tagname, "P") == 0) { 00253 if (want_display) 00254 fputs("\n\n", stdout); 00255 col = 0; 00256 } 00257 else if (strcasecmp(tagname, "BR") == 0) { 00258 if (want_display) 00259 fputs("\n", stdout); 00260 col = 0; 00261 } 00262 else if (strcasecmp(tagname, "A") == 0) { 00263 inlink = true; 00264 nlink = 0; 00265 add_href(tag); 00266 } 00267 else if (inlink && strcasecmp(tagname, "/A") == 0) { 00268 link[nlink] = '\0'; 00269 add_link(link); 00270 inlink = false; 00271 } 00272 else if (strcasecmp(tagname, "SCRIPT") == 0) 00273 inscript = true; 00274 else if (inscript && strcasecmp(tagname, "/SCRIPT") == 0) 00275 inscript = false; 00276 } 00277 else if ((!isspace(c) || ntag > 0) && ntag < sizeof(tag) - 1) 00278 tag[ntag++] = c; 00279 break; 00280 case DSP_INENTITY: 00281 if (c == ';') { 00282 state = DSP_NORMAL; 00283 if (want_display) 00284 putc('?', stdout); 00285 } 00286 break; 00287 default: 00288 fprintf(stderr, __FILE__ ":" __func__ " -- bad state\n"); 00289 exit(EXIT_FAILURE); 00290 } 00291 } 00292 } 00293 00294 #define FN_TRANSFER_ENCODING "Transfer-Encoding" 00295 #define FN_CONTENT_LENGTH "Content-Length" 00296 #define FN_LOCATION "Location" 00297 00298 /* 00299 Assumes whole header and chunk lengths are contiguous in a single call. 00300 */ 00301 static bool response(char *s, int len, bool start, bool showtags, 00302 bool want_display, char **newloc) 00303 { 00304 char *body, *fn, *fv, *colon; 00305 static bool ischunked; 00306 static int chunklen; 00307 00308 /*printf("\n\n****response start = %d; len = %d\n", start, len);*/ 00309 *newloc = NULL; 00310 if (start) { 00311 ischunked = false; 00312 chunklen = 0; 00313 body = strstr(s, "\r\n\r\n"); 00314 if (body == NULL) { 00315 printf("Invalid header in response:\n"); 00316 printf("%.500s\n", s); 00317 return false; 00318 } 00319 *body = '\0'; 00320 body += 4; 00321 if ((fn = strstr(s, FN_TRANSFER_ENCODING)) != NULL) { 00322 if ((colon = strchr(fn, ':')) != NULL) { 00323 fv = colon + 1; 00324 while (isspace((int)*fv)) 00325 fv++; 00326 if (strncasecmp(fv, "chunked", 7) == 0) 00327 ischunked = true; 00328 } 00329 } 00330 if ((fn = strstr(s, FN_CONTENT_LENGTH)) != NULL) { 00331 if ((colon = strchr(fn, ':')) != NULL) { 00332 fv = colon + 1; 00333 while (isspace((int)*fv)) 00334 fv++; 00335 chunklen = (int)strtol(fv, NULL, 10); 00336 } 00337 } 00338 if ((fn = strstr(s, FN_LOCATION)) != NULL) { 00339 if ((colon = strchr(fn, ':')) != NULL) { 00340 fv = colon + 1; 00341 while (isspace((int)*fv)) 00342 fv++; 00343 printf("Location = \"%s\"\n", fv); 00344 *newloc = fv; 00345 return true; 00346 } 00347 } 00348 len -= body - s; 00349 /*printf("header size = %d; revised len = %d\n", body - s, len);*/ 00350 s = body; 00351 /*printf("chunked = %d; chunklen = %d\n", ischunked, chunklen);*/ 00352 } 00353 while (len > 0) { 00354 int seglen; 00355 00356 if (ischunked && chunklen <= 0) { 00357 char *end; 00358 00359 chunklen = strtol(s, &end, 16); 00360 /*printf("Got chunked len = %d from [%.5s] end = %c\n", chunklen, s, *end);*/ 00361 if (chunklen > 0) 00362 chunklen += 2; /* CRLF at end of data not counted yet */ 00363 if (strncmp(end, "\r\n", 2) == 0) 00364 end += 2; 00365 len -= end - s; 00366 s = end; 00367 } 00368 if (chunklen <= 0) { 00369 /*printf("discarded len = %d\n", len);*/ 00370 return false; 00371 } 00372 seglen = chunklen; 00373 if (seglen > len) 00374 seglen = len; 00375 #if 0 00376 if (seglen < 40) { 00377 char buf[50]; 00378 strncpy(buf, s, seglen); 00379 buf[seglen] = '\0'; 00380 printf("display(%s, %d)\n", buf, seglen); 00381 } 00382 else 00383 printf("display(%.15s ... %.15s, %d)\n", s, &s[seglen - 15], seglen); 00384 #endif 00385 display(s, seglen, showtags, want_display); 00386 len -= seglen; 00387 s += seglen; 00388 chunklen -= seglen; 00389 } 00390 return chunklen > 0; 00391 } 00392 00393 int main(int argc, char *argv[]) 00394 { 00395 SSI *ssip = NULL; 00396 char msg[1600], *newloc; 00397 const char *url = "http://www.gnu.org"; 00398 ssize_t nrcv, nrcv_total; 00399 int fd, c, limit = 0; 00400 bool showtags = false, retry = false, want_display = true, 00401 raw_display = false; 00402 char host[500] = "", prevhost[500] = "", 00403 path[MAX_HREF] = "/", newpath[MAX_HREF]; 00404 00405 while ((c = getopt(argc, argv, ":arstu:")) != -1) 00406 switch(c) { 00407 case 'a': 00408 limit = 10; 00409 break; 00410 case 's': 00411 want_display = false; 00412 break; 00413 case 'r': 00414 raw_display = true; 00415 want_display = false; 00416 break; 00417 case 't': 00418 showtags = true; 00419 break; 00420 case 'u': 00421 url = optarg; 00422 break; 00423 case ':': 00424 fprintf(stderr, "Option -%c requires an operand\n", optopt); 00425 /* fall through */ 00426 default: 00427 fprintf(stderr, "Usage: br [-a] [-s] [-t] [-u url]\n"); 00428 exit(EXIT_FAILURE); 00429 } 00430 while (true) { 00431 bool start; 00432 00433 if (newloc != NULL) { 00434 printf("newloc = %s\n", newloc); 00435 url = newloc; 00436 newloc = NULL; 00437 retry = false; 00438 } 00439 parse_url(url, host, sizeof(host), newpath, sizeof(newpath)); 00440 if (!retry) 00441 makepath(path, sizeof(path), newpath); 00442 if (strcmp(prevhost, host) != 0 || ssip == NULL) { 00443 snprintf(msg, sizeof(msg), "//%s%s", host, 00444 strchr(host, ':') == NULL ? ":80" : ""); 00445 printf("Connecting to host \"%s\"\n", msg); 00446 ssip = ssi_open(msg, false); 00447 ec_neg1( fd = ssi_get_server_fd(ssip) ) 00448 if (ssip == NULL) { 00449 /* need to document the following fcn */ 00450 syserr_print("Can't connect"); 00451 if ((url = go_link(limit)) == NULL) 00452 break; 00453 continue; 00454 } 00455 strcpy(prevhost, host); 00456 printf("\t...connected.\n"); 00457 } 00458 else 00459 printf("Continuing with exisiting connection\n"); 00460 snprintf(msg, sizeof(msg), "GET %s HTTP/1.1\nHost: %s\n\n", 00461 path, host); 00462 printf("Browsing URL \"%s\"\n", path); 00463 nrcv_total = 0; 00464 nanchors = 0; 00465 memset(anchor, 0, sizeof(anchor)); 00466 ec_neg1( writeall(fd, msg, strlen(msg)) ) 00467 while (true) { 00468 start = nrcv_total == 0; 00469 ec_neg1( nrcv = read(fd, msg, sizeof(msg)) ) 00470 if (nrcv == 0) 00471 break; 00472 msg[nrcv] = '\0'; 00473 nrcv_total += nrcv; 00474 if (raw_display) 00475 printf("\n\n*** %d bytes in message ***\n%s\n" 00476 "***************************\n", nrcv, 00477 start ? msg : " (message text not shown)"); 00478 if (!response(msg, nrcv, start, showtags, want_display, 00479 &newloc) || newloc != NULL) 00480 break; 00481 } 00482 if (newloc != NULL) 00483 continue; 00484 if (nrcv_total == 0 && !retry) { 00485 printf("Connection dropped by host\n"); 00486 ec_false( ssi_close(ssip) ) 00487 ssip = NULL; 00488 retry = true; 00489 continue; 00490 } 00491 printf("\n%d total bytes received\n", nrcv_total); 00492 retry = false; 00493 if ((url = go_link(limit)) == NULL) 00494 break; 00495 } 00496 if (ssip != NULL) 00497 ec_false( ssi_close(ssip) ) 00498 printf("Done.\n"); 00499 exit(EXIT_SUCCESS); 00500 00501 EC_CLEANUP_BGN 00502 return EXIT_FAILURE; 00503 EC_CLEANUP_END 00504 }