©2004 by Marc J. Rochkind. All rights reserved. Portions marked "Open Source" may be copied under license.

 

Main Page   Modules   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members  

c8/br.c

Go to the documentation of this file.
00001 /*
00002     More-complete Web Browser
00003     AUP2, Sec. 8.04.3 (not in book)
00004 
00005     Copyright 2003 by Marc J. Rochkind. All rights reserved.
00006     May be copied only for purposes and under conditions described
00007     on the Web page www.basepath.com/aup/copyright.htm.
00008 
00009     The Example Files are provided "as is," without any warranty;
00010     without even the implied warranty of merchantability or fitness
00011     for a particular purpose. The author and his publisher are not
00012     responsible for any damages, direct or incidental, resulting
00013     from the use or non-use of these Example Files.
00014 
00015     The Example Files may contain defects, and some contain deliberate
00016     coding mistakes that were included for educational reasons.
00017     You are responsible for determining if and how the Example Files
00018     are to be used.
00019 
00020 */
00021 #include "defs.h"
00022 #include "ssi.h"
00023 
00024 /*
00025     http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]]
00026 */
00027 
00028 /* use basename? *********************************************************/
00029 
00030 #define MAX_LINK    100
00031 #define MAX_HREF    120
00032 #define MAX_ANCHORS 200
00033 
00034 static struct {
00035     char a_link[MAX_LINK];
00036     char a_href[MAX_HREF];
00037 } anchor[MAX_ANCHORS];
00038 static int nanchors;
00039 
00040 static void add_href(const char *tag)
00041 {
00042     const char *p, *href = NULL;
00043     char delim = '\0';
00044     int nhref;
00045 
00046     if (nanchors < MAX_ANCHORS) {
00047         anchor[nanchors].a_href[0] = '\0';
00048         anchor[nanchors].a_link[0] = '\0';
00049         for (p = tag; *p && *p != delim; p++)
00050             if (href == NULL && strncasecmp(p, "HREF", 4) == 0) {
00051                 p += 4;
00052                 while (isspace((int)*p))
00053                     p++;
00054                 if (*p == '=') {
00055                     p++;
00056                     while (isspace((int)*p))
00057                         p++;
00058                     if (*p == '"' || *p == '\'')
00059                         delim = *p;
00060                     else
00061                         p--;
00062                     href = p + 1;
00063                 }
00064             }
00065         if (href != NULL) {
00066             nhref = p - href;
00067             if (nhref > MAX_HREF - 1)
00068                 nhref = MAX_HREF - 1;
00069             strncpy(anchor[nanchors].a_href, href, nhref);
00070             anchor[nanchors].a_href[nhref] = '\0';
00071         }
00072     }
00073 }
00074 
00075 static void add_link(const char *link)
00076 {
00077     int nlink;
00078 
00079     if (nanchors < MAX_ANCHORS && anchor[nanchors].a_href[0] != '\0') {
00080         nlink = strlen(link);
00081         if (nlink > MAX_LINK - 1)
00082             nlink = MAX_LINK - 1;
00083         strncpy(anchor[nanchors].a_link, link, nlink);
00084         anchor[nanchors].a_link[nlink] = '\0';
00085         nanchors++;
00086     }
00087 }
00088 
00089 static const char *go_link(int limit)
00090 {
00091     int i;
00092     long n;
00093     static char buf[MAX_HREF];
00094 
00095     if (limit <= 0 || limit > nanchors)
00096         limit = nanchors;
00097     for (i = 0; i < limit; i++)
00098         printf("%d:\t%-20.20s --> %s\n", i + 1, anchor[i].a_link,
00099           anchor[i].a_href);
00100     if (limit < nanchors)
00101         printf("\t(%d more links not shown)\n", nanchors - limit);
00102     while (true) {
00103         printf("Link number, URL, or Ctrl-D to quit]? ");
00104         if (fgets(buf, sizeof(buf), stdin) == NULL || buf[0] == 'q')
00105             return NULL;
00106         for (i = 0; buf[i]; i++)
00107             if (!isspace((int)buf[i]) && !isdigit((int)buf[i])) {
00108                 i = strlen(buf);
00109                 if (i > 0 && buf[i - 1] == '\n')
00110                     buf[i - 1] = '\0';
00111                 return buf;
00112             }
00113         n = strtol(buf, NULL, 10);
00114         if (n > 0 && n <= limit)
00115             return anchor[n - 1].a_href;
00116         printf("Invalid number\n");
00117     }
00118 }
00119 
00120 static void parse_url(const char *url, char *host, size_t max_host,
00121   char *path, size_t max_path)
00122 {
00123     const char *slash;
00124     int host_len;
00125     bool new_host = false;
00126 
00127     if (strncasecmp(url, "http://", 7) == 0) {
00128         url += 7;
00129         new_host = true;
00130     }
00131     if (strncasecmp(url, "https://", 8) == 0) {
00132         url += 8;
00133         new_host = true;
00134     }
00135     if (new_host) {
00136         slash = strchr(url, '/');
00137         if (slash == NULL) {
00138             strcpy(path, "/");
00139             slash = &url[strlen(url)];
00140         }
00141         else {
00142             strncpy(path, slash, max_path);
00143             path[max_path - 1] = '\0';
00144         }
00145         host_len = slash - url;
00146         if (host_len > max_host - 1)
00147             host_len = max_host - 1;
00148         strncpy(host, url, host_len);
00149         host[host_len] = '\0';
00150     }
00151     else {
00152         strncpy(path, url, max_path);
00153         path[max_path - 1] = '\0';
00154     }
00155 }
00156 
00157 static void makepath(char *path, size_t max_path, const char *newpath)
00158 {
00159     char *filename;
00160 
00161     if (newpath[0] == '/')
00162         strncpy(path, newpath, max_path);
00163     else {
00164         filename = strrchr(path, '/');
00165         if (filename == NULL)
00166             filename = path;
00167         else
00168             filename++;
00169         /* anything with a dot is a file name to us */
00170         if (strchr(filename, '.') != NULL)
00171             *filename = '\0';
00172         if (path[strlen(path) - 1] != '/')
00173             strncat(path, "/", max_path);
00174         strncat(path, newpath, max_path);
00175     }
00176 }
00177 
00178 static void display(const char *s, int len, bool showtags,
00179   bool want_display)
00180 {
00181     static enum {DSP_NORMAL, DSP_INTAG, DSP_INENTITY} state = DSP_NORMAL;
00182     static int taglen, ntag = 0, col = 0, nlink = 0;
00183     static char tag[100], link[MAX_LINK];
00184     static bool inlink = false, inscript = false;
00185       /* substates of DSP_NORMAL */
00186     int i, c;
00187 
00188     for (i = 0; i < len; i++) {
00189         c = s[i] & 0177;
00190 
00191         if (c == '\r') {
00192 #if 0
00193             if (want_display)
00194                 putc('@', stdout);
00195 #endif
00196             continue;
00197         }
00198 #if 0
00199         if (c == '\n') {
00200             if (want_display)
00201                 putc('#', stdout);
00202         }
00203 #endif
00204         switch (state) {
00205         case DSP_NORMAL:
00206             if (c == '<') {
00207                 state = DSP_INTAG;
00208                 break;
00209             }
00210             if (inscript)
00211                 break;
00212             switch (c) {
00213             case '\n':
00214                 if (col > 0) {
00215                     if (want_display)
00216                         putc('\n', stdout);
00217                     col = 0;
00218                 }
00219                 break;
00220             case '&':
00221                 state = DSP_INENTITY;
00222                 break;
00223             default:
00224                 if (inlink && nlink < sizeof(link) - 1)
00225                     link[nlink++] = (char)c;
00226                 if (col >= 80) {
00227                     if (want_display)
00228                         putc('\n', stdout);
00229                     col = 0;
00230                 }
00231                 if (col > 0 || !isspace(c)) {
00232                     col++;
00233                     if (want_display)
00234                         putc(c, stdout);
00235                 }
00236             }
00237             break;
00238         case DSP_INTAG:
00239             if (c == '>') {
00240                 char tagname[20];
00241 
00242                 tag[ntag] = '\0';
00243                 if (showtags)
00244                     printf("<%s>\n", tag);
00245                 ntag = 0;
00246                 state = DSP_NORMAL;
00247                 taglen = strcspn(tag, " \t\n");
00248                 if (taglen > sizeof(tagname) - 1)
00249                     taglen = sizeof(tagname) - 1;
00250                 strncpy(tagname, tag, taglen);
00251                 tagname[taglen] = '\0';
00252                 if (strcasecmp(tagname, "P") == 0) {
00253                     if (want_display)
00254                         fputs("\n\n", stdout);
00255                     col = 0;
00256                 }
00257                 else if (strcasecmp(tagname, "BR") == 0) {
00258                     if (want_display)
00259                         fputs("\n", stdout);
00260                     col = 0;
00261                 }
00262                 else if (strcasecmp(tagname, "A") == 0) {
00263                     inlink = true;
00264                     nlink = 0;
00265                     add_href(tag);
00266                 }
00267                 else if (inlink && strcasecmp(tagname, "/A") == 0) {
00268                     link[nlink] = '\0';
00269                     add_link(link);
00270                     inlink = false;
00271                 }
00272                 else if (strcasecmp(tagname, "SCRIPT") == 0)
00273                     inscript = true;
00274                 else if (inscript && strcasecmp(tagname, "/SCRIPT") == 0)
00275                     inscript = false;
00276             }
00277             else if ((!isspace(c) || ntag > 0) && ntag < sizeof(tag) - 1)
00278                 tag[ntag++] = c;
00279             break;
00280         case DSP_INENTITY:
00281             if (c == ';') {
00282                 state = DSP_NORMAL;
00283                 if (want_display)
00284                     putc('?', stdout);
00285             }
00286             break;
00287         default:
00288             fprintf(stderr, __FILE__ ":" __func__ " -- bad state\n");
00289             exit(EXIT_FAILURE);
00290         }
00291     }
00292 }
00293 
00294 #define FN_TRANSFER_ENCODING    "Transfer-Encoding"
00295 #define FN_CONTENT_LENGTH       "Content-Length"
00296 #define FN_LOCATION             "Location"
00297 
00298 /*
00299     Assumes whole header and chunk lengths are contiguous in a single call.
00300 */
00301 static bool response(char *s, int len, bool start, bool showtags,
00302   bool want_display, char **newloc)
00303 {
00304     char *body, *fn, *fv, *colon;
00305     static bool ischunked;
00306     static int chunklen;
00307 
00308 /*printf("\n\n****response start = %d; len = %d\n", start, len);*/
00309     *newloc = NULL;
00310     if (start) {
00311         ischunked = false;
00312         chunklen = 0;
00313         body = strstr(s, "\r\n\r\n");
00314         if (body == NULL) {
00315             printf("Invalid header in response:\n");
00316             printf("%.500s\n", s);
00317             return false;
00318         }
00319         *body = '\0';
00320         body += 4;
00321         if ((fn = strstr(s, FN_TRANSFER_ENCODING)) != NULL) {
00322             if ((colon = strchr(fn, ':')) != NULL) {
00323                 fv = colon + 1;
00324                 while (isspace((int)*fv))
00325                     fv++;
00326                 if (strncasecmp(fv, "chunked", 7) == 0)
00327                     ischunked = true;
00328             }
00329         }
00330         if ((fn = strstr(s, FN_CONTENT_LENGTH)) != NULL) {
00331             if ((colon = strchr(fn, ':')) != NULL) {
00332                 fv = colon + 1;
00333                 while (isspace((int)*fv))
00334                     fv++;
00335                 chunklen = (int)strtol(fv, NULL, 10);
00336             }
00337         }
00338         if ((fn = strstr(s, FN_LOCATION)) != NULL) {
00339             if ((colon = strchr(fn, ':')) != NULL) {
00340                 fv = colon + 1;
00341                 while (isspace((int)*fv))
00342                     fv++;
00343                 printf("Location = \"%s\"\n", fv);
00344                 *newloc = fv;
00345                 return true;
00346             }
00347         }
00348         len -= body - s;
00349 /*printf("header size = %d; revised len = %d\n", body - s, len);*/
00350         s = body;
00351         /*printf("chunked = %d; chunklen = %d\n", ischunked, chunklen);*/
00352     }
00353     while (len > 0) {
00354         int seglen;
00355 
00356         if (ischunked && chunklen <= 0) {
00357             char *end;
00358 
00359             chunklen = strtol(s, &end, 16);
00360 /*printf("Got chunked len = %d from [%.5s] end = %c\n", chunklen, s, *end);*/
00361             if (chunklen > 0)
00362                 chunklen += 2; /* CRLF at end of data not counted yet */
00363             if (strncmp(end, "\r\n", 2) == 0)
00364                 end += 2;
00365             len -= end - s;
00366             s = end;
00367         }
00368         if (chunklen <= 0) {
00369 /*printf("discarded len = %d\n", len);*/
00370             return false;
00371 }
00372         seglen = chunklen;
00373         if (seglen > len)
00374             seglen = len;
00375 #if 0
00376 if (seglen < 40) {
00377 char buf[50];
00378 strncpy(buf, s, seglen);
00379 buf[seglen] = '\0';
00380 printf("display(%s, %d)\n", buf, seglen);
00381 }
00382 else
00383 printf("display(%.15s ... %.15s, %d)\n", s, &s[seglen - 15], seglen);
00384 #endif
00385         display(s, seglen, showtags, want_display);
00386         len -= seglen;
00387         s += seglen;
00388         chunklen -= seglen;
00389     }
00390     return chunklen > 0;
00391 }
00392 
00393 int main(int argc, char *argv[])
00394 {
00395     SSI *ssip = NULL;
00396     char msg[1600], *newloc;
00397     const char *url = "http://www.gnu.org";
00398     ssize_t nrcv, nrcv_total;
00399     int fd, c, limit = 0;
00400     bool showtags = false, retry = false, want_display = true,
00401       raw_display = false;
00402     char host[500] = "", prevhost[500] = "",
00403       path[MAX_HREF] = "/", newpath[MAX_HREF];
00404 
00405     while ((c = getopt(argc, argv, ":arstu:")) != -1)
00406         switch(c) {
00407         case 'a':
00408             limit = 10;
00409             break;
00410         case 's':
00411             want_display = false;
00412             break;
00413         case 'r':
00414             raw_display = true;
00415             want_display = false;
00416             break;
00417         case 't':
00418             showtags = true;
00419             break;
00420         case 'u':
00421             url = optarg;
00422             break;
00423         case ':':
00424             fprintf(stderr, "Option -%c requires an operand\n", optopt);
00425             /* fall through */
00426         default:
00427             fprintf(stderr, "Usage: br [-a] [-s] [-t] [-u url]\n");
00428             exit(EXIT_FAILURE);
00429         }
00430     while (true) {
00431         bool start;
00432 
00433         if (newloc != NULL) {
00434 printf("newloc = %s\n", newloc);
00435             url = newloc;
00436             newloc = NULL;
00437             retry = false;
00438         }
00439         parse_url(url, host, sizeof(host), newpath, sizeof(newpath));
00440         if (!retry)
00441             makepath(path, sizeof(path), newpath);
00442         if (strcmp(prevhost, host) != 0 || ssip == NULL) {
00443             snprintf(msg, sizeof(msg), "//%s%s", host,
00444               strchr(host, ':') == NULL ? ":80" : "");
00445             printf("Connecting to host \"%s\"\n", msg);
00446             ssip = ssi_open(msg, false);
00447             ec_neg1( fd = ssi_get_server_fd(ssip) )
00448             if (ssip == NULL) {
00449                 /* need to document the following fcn */
00450                 syserr_print("Can't connect");
00451                 if ((url = go_link(limit)) == NULL)
00452                     break;
00453                 continue;
00454             }
00455             strcpy(prevhost, host);
00456             printf("\t...connected.\n");
00457         }
00458         else
00459             printf("Continuing with exisiting connection\n");
00460         snprintf(msg, sizeof(msg), "GET %s HTTP/1.1\nHost: %s\n\n",
00461           path, host);
00462         printf("Browsing URL \"%s\"\n", path);
00463         nrcv_total = 0;
00464         nanchors = 0;
00465         memset(anchor, 0, sizeof(anchor));
00466         ec_neg1( writeall(fd, msg, strlen(msg)) )
00467         while (true) {
00468             start = nrcv_total == 0;
00469             ec_neg1(  nrcv = read(fd, msg, sizeof(msg)) )
00470             if (nrcv == 0)
00471                 break;
00472             msg[nrcv] = '\0';
00473             nrcv_total += nrcv;
00474             if (raw_display)
00475                 printf("\n\n*** %d bytes in message ***\n%s\n"
00476                   "***************************\n", nrcv,
00477                   start ? msg : "   (message text not shown)");
00478             if (!response(msg, nrcv, start, showtags, want_display,
00479               &newloc) || newloc != NULL)
00480                 break;
00481         }
00482         if (newloc != NULL)
00483             continue;
00484         if (nrcv_total == 0 && !retry) {
00485             printf("Connection dropped by host\n");
00486             ec_false( ssi_close(ssip) )
00487             ssip = NULL;
00488             retry = true;
00489             continue;
00490         }
00491         printf("\n%d total bytes received\n", nrcv_total);
00492         retry = false;
00493         if ((url = go_link(limit)) == NULL)
00494             break;
00495     }
00496     if (ssip != NULL)
00497         ec_false( ssi_close(ssip) )
00498     printf("Done.\n");
00499     exit(EXIT_SUCCESS);
00500 
00501 EC_CLEANUP_BGN
00502     return EXIT_FAILURE;
00503 EC_CLEANUP_END
00504 }

Generated on Fri Apr 23 10:57:01 2004 for AUP2 Example Source by doxygen 1.3.1