Home > Articles > Programming > C#

  • Print
  • + Share This
Like this article? We recommend

Like this article? We recommend

Putting It All Together

Remember our class skeleton, which heretofore did nothing but get input (see Listing 69)?

Listing 69—Class Skeleton Revisited

using System;

class vampirebot 
{
string base_url, folder;

vampirebot(string url, string dir)
{
int slash_loc;

slash_loc = url.LastIndexOf("/");
base_url = url.Substring(0, slash_loc+1);
folder  = dir;
}

public static void Main()
{
string   url, dir;
vampirebot vbot;

Console.Write("Enter starting URL: ");
url=Console.ReadLine();
Console.Write("Destination folder? ");
dir=Console.ReadLine();

vbot = new vampirebot(url,dir);

}
}

To complete the vampire bot, add all the namespaces and methods used (see Listing 70):

Listing 70—Class Skeleton with Methods Added

using System;
using System.Net;
using System.IO;
using System.Collections;

class vampirebot 
{
string base_url, folder;

vampirebot(string url, string dir)
{
int slash_loc;

slash_loc = url.LastIndexOf("/");
base_url = url.Substring(0, slash_loc+1);
folder  = dir;
}

public string URLtoRawHTML(string URL)
{
WebRequest req;
WebResponse res;
Stream   str;

string   RawHTML;
int     ch;

req = WebRequest.Create(URL);
res = req.GetResponse();
str = res.GetResponseStream();

RawHTML = "";
while ((ch=str.ReadByte())!=-1)
 RawHTML=RawHTML+Convert.ToChar(ch);

str.Close();
res.Close();

return RawHTML;
}

public ArrayList RawHTMLtoImageList(string raw_html)
{
string patt, spat, epat;
int  ploc, sloc, eloc;
string file;
ArrayList list;

patt=".gif";
spat="\"" ;
epat="\"" ;

list = new ArrayList();

ploc=raw_html.IndexOf  (patt, 0);
while (ploc>=0) {
 sloc=raw_html.LastIndexOf(spat, ploc)+1;
 eloc=raw_html.IndexOf  (epat, sloc)-1;
 file=raw_html.Substring (sloc, eloc-sloc+1);
 ploc=raw_html.IndexOf  (patt, eloc);
 list.Add(file);
}

return list;
}

public void ImageListtoFiles(ArrayList file_list)
{
int i;
string filename;
FileStream fs;

WebRequest req;
WebResponse res;
Stream   str;

int ch;

for (i=0; i < file_list.Count; i++) {
 filename=Convert.ToString(file_list[i]);
 filename=filename.Replace("/", "_");
 filename= folder+"/"+filename;
 fs=new FileStream(filename, FileMode.Create);

 req = WebRequest.Create(base_url+file_list[i]);
 res = req.GetResponse();
 str = res.GetResponseStream();

 while ((ch=str.ReadByte())!=-1)
  fs.WriteByte(Convert.ToByte(ch));

 str.Close();
 res.Close();
 fs.Close();
}

}

public static void Main()
{
string   url, dir;
vampirebot vbot;

Console.Write("Enter starting URL: ");
url=Console.ReadLine();
Console.Write("Destination folder? ");
dir=Console.ReadLine();

vbot = new vampirebot(url,dir);

}
}

With the methods added, simply call them in Main() (see Listing 71):

Listing 71—Class Skeleton with Methods Added and Calls to Those Methods

using System;
using System.Net;
using System.IO;
using System.Collections;

class vampirebot 
{
string base_url, folder;

vampirebot(string url, string dir)
{
int slash_loc;

slash_loc = url.LastIndexOf("/");
base_url = url.Substring(0, slash_loc+1);
folder  = dir;
}

public string URLtoRawHTML(string URL)
{
WebRequest req;
WebResponse res;
Stream   str;

string   RawHTML;
int     ch;

req = WebRequest.Create(URL);
res = req.GetResponse();
str = res.GetResponseStream();

RawHTML = "";
while ((ch=str.ReadByte())!=-1)
 RawHTML=RawHTML+Convert.ToChar(ch);

str.Close();
res.Close();

return RawHTML;
}

public ArrayList RawHTMLtoImageList(string raw_html)
{
string patt, spat, epat;
int  ploc, sloc, eloc;
string file;
ArrayList list;

patt=".gif";
spat="\"" ;
epat="\"" ;

list = new ArrayList();

ploc=raw_html.IndexOf  (patt, 0);
while (ploc>=0) {
 sloc=raw_html.LastIndexOf(spat, ploc)+1;
 eloc=raw_html.IndexOf  (epat, sloc)-1;
 file=raw_html.Substring (sloc, eloc-sloc+1);
 ploc=raw_html.IndexOf  (patt, eloc);
 list.Add(file);
}

return list;
}

public void ImageListtoFiles(ArrayList file_list)
{
int i;
string filename;
FileStream fs;

WebRequest req;
WebResponse res;
Stream   str;

int ch;

for (i=0; i < file_list.Count; i++) {
 filename=Convert.ToString(file_list[i]);
 filename=filename.Replace("/", "_");
 filename= folder+"/"+filename;
 fs=new FileStream(filename, FileMode.Create);

 req = WebRequest.Create(base_url+file_list[i]);
 res = req.GetResponse();
 str = res.GetResponseStream();

 while ((ch=str.ReadByte())!=-1)
  fs.WriteByte(Convert.ToByte(ch));

 str.Close();
 res.Close();
 fs.Close();
}

}

public static void Main()
{
string   url, dir;
vampirebot vbot;
string   rawHTML;
ArrayList  alist;

Console.Write("Enter starting URL: ");
url=Console.ReadLine();
Console.Write("Destination folder? ");
dir=Console.ReadLine();

vbot = new vampirebot(url,dir);

rawHTML = vbot.URLtoRawHTML(url);
alist  = vbot.RawHTMLtoImageList(rawHTML);
vbot.ImageListtoFiles(alist);
}
}

Note that we defined two variables to hold the methods' return values, rawHTML (a string), and alist (an ArrayList). Our completed vampire bot is shown in Listing 72.

Listing 72—Completed Vampire Bot

using System;
using System.Net;
using System.IO;
using System.Collections;

class vampirebot 
{
string base_url, folder;

vampirebot(string url, string dir)
{
int slash_loc;

slash_loc = url.LastIndexOf("/");
base_url = url.Substring(0, slash_loc+1);
folder  = dir;
}

public string URLtoRawHTML(string URL)
{
WebRequest req;
WebResponse res;
Stream   str;

string   RawHTML;
int     ch;

req = WebRequest.Create(URL);
res = req.GetResponse();
str = res.GetResponseStream();

RawHTML = "";
while ((ch=str.ReadByte())!=-1)
 RawHTML=RawHTML+Convert.ToChar(ch);

str.Close();
res.Close();

return RawHTML;
}

public ArrayList RawHTMLtoImageList(string raw_html)
{
string patt, spat, epat;
int  ploc, sloc, eloc;
string file;
ArrayList list;

patt=".gif";
spat="\"" ;
epat="\"" ;

list = new ArrayList();

ploc=raw_html.IndexOf  (patt, 0);
while (ploc>=0) {
 sloc=raw_html.LastIndexOf(spat, ploc)+1;
 eloc=raw_html.IndexOf  (epat, sloc)-1;
 file=raw_html.Substring (sloc, eloc-sloc+1);
 ploc=raw_html.IndexOf  (patt, eloc);
 list.Add(file);
}

return list;
}

public void ImageListtoFiles(ArrayList file_list)
{
int i;
string filename;
FileStream fs;

WebRequest req;
WebResponse res;
Stream   str;

int ch;

for (i=0; i < file_list.Count; i++) {
filename=Convert.ToString(file_list[i]);
filename=filename.Replace("/", "_");
filename= folder+"/"+filename;
fs=new FileStream(filename, FileMode.Create);

req = WebRequest.Create(base_url+file_list[i]);
res = req.GetResponse();
str = res.GetResponseStream();

 while ((ch=str.ReadByte())!=-1)
  fs.WriteByte(Convert.ToByte(ch));

 str.Close();
 res.Close();
 fs.Close();
}

}

public static void Main()
{
string   url, dir;
vampirebot vbot;
string   rawHTML;
ArrayList  alist;

Console.Write("Enter starting URL: ");
url=Console.ReadLine();
Console.Write("Destination folder? ");
dir=Console.ReadLine();

vbot = new vampirebot(url,dir);

rawHTML = vbot.URLtoRawHTML(url);
alist  = vbot.RawHTMLtoImageList(rawHTML);
vbot.ImageListtoFiles(alist);
}
}

That's it! Clearly there's plenty of room for improving this basic vampire bot—the basic vampire bot only reads GIF files from one web page. One change you could make is to have it read JPG and other image file formats. Another change is to have the bot traverse other links on the page once it has finished downloading all the images on the current web page. Finally, there is plenty of error-handling code you could add. For example, if the user specifies a web site address without a web page, such as http://www.professorf.com versus http://www.professorf.com/planets.html, the calculation of the base_url variable changes. For an easy change, try making the bot print out the image that it's currently downloading.

If you would like these topics covered in a future article, let us know. Of course, we prefer that you figure them out yourselves, as this is the point of code improvisation and recreational programming in general. Until next time, keep on coding and having fun while you do so!

  • + Share This
  • 🔖 Save To Your Account