AddImgHW
// AddImgHW.java - Add Image Height & Width 0.9 96/02/16
//
// A Java application by Michael Heinrichs (heinrica@cs.sfu.ca)
// Permission to use, copy, modify, and distribute this software
// for NON-COMMERCIAL purposes without fee is hereby granted.
// If you modify and/or extend this program and distribute the
// result, please inform me (at the above email address), and
// mention my name and email address in the resulting program.
//
// Purpose: Add HEIGHT and WIDTH parameters to IMG tags in HTML documents.
// Usage: java AddImgHW [destination-file]
//
// Method: The application reads in the HTML character by character and
// sends the characters to output. If an IMG tag is encountered,
// it is not output, but is further processed. The IMG tag is
// checked for the presence of HEIGHT and WIDTH parameters. If
// both of the parameters are already present, the IMG tag is
// output with no modification. If either parameter is missing,
// the URL for the image is accessed.
// If the image is inaccessible for whatever reason, the IMG tag
// is output with no modification.
// The dimensions of the image are retrieved, and the HEIGHT and/or
// WIDTH parameters are added to the IMG tag, which is then output.
// Error messages are printed to the System.err (stderr) stream.
//
// Limitations: The specified URL works best if it uses the HTTP protocol.
// Using the FILE protocol should be possible, but it is awkward.
// Be aware of the limitations of relative paths with the FILE
// protocol.
//
// The program assumes for the most part that it is handling valid
// HTML. The program makes generous allowances for the positions
// of whitespace (as do the HTML specs), and will complain about
// some glaring errors. There are probably numerous methods of
// 'fooling' the program into making a mistake (IMG tags inside
// comments and/or Javascript might do weird things). Don't bother
// trying to break the program. It's breakable. I didn't try to
// write a complete HTML parser here.
//
// Bugs: Yes, there are probably some bugs in here somewhere. If you
// think you've found one, please let me know, and I'll fix it.
// (Can't promise you prizes like Netscape though)
import java.io.*;
import java.net.*;
import java.awt.*;
import java.util.Hashtable;
class AddImgHW {
static Hashtable imageURLs = new Hashtable(40);
public static void main(String args[]) {
PrintStream outStream=System.out;
InputStream inStream;
URL inURL;
BufferedInputStream bufinStream;
boolean closeOutStream=false;
if (args.length == 0) {
usage();
System.exit(1);
}
try {
inURL = new URL(args[0]);
inStream = inURL.openStream();
bufinStream = new BufferedInputStream(inStream);
try {
if(args.length == 1) {
closeOutStream = false;
outStream = System.out;
} else {
closeOutStream = true;
outStream = new PrintStream((OutputStream)new FileOutputStream(args[1]), true);
}
process(inURL, bufinStream, outStream);
} catch(IOException e) {
System.err.println("Bad destination filename.");
usage();
System.err.println(e.getMessage());
e.printStackTrace(System.err);
System.exit(1);
} finally {
bufinStream.close();
if(closeOutStream) {
outStream.close();
}
}
} catch(MalformedURLException e) {
System.err.println("The source URL is malformed.");
usage();
System.err.println(e.getMessage());
e.printStackTrace(System.err);
System.exit(1);
} catch(IOException e) {
System.err.println("Unable to connect to "+args[0]);
System.err.println(e.getMessage());
e.printStackTrace(System.err);
System.exit(1);
}
}
public static void process(URL inURL, BufferedInputStream in, PrintStream out) {
String img_spec;
while(true) {
img_spec = dumpUntilImg(in, out);
if(img_spec == null) { // EOF has been reached
break;
}
if((!paramExists(img_spec,"height"))||(!paramExists(img_spec,"width"))) {
img_spec = addHW(inURL, img_spec);
}
out.print(img_spec);
}
}
public static String dumpUntilImg(BufferedInputStream in, PrintStream out) {
String rtn;
boolean img = false;
char ch;
int b=0;
try {
loop1:
while(true) {
b = in.read();
if(b==-1) break loop1;
ch = (char)b;
if(ch == '<') {
rtn = String.valueOf(ch);
loop2:
while(true) {
ch = (char)in.read();
switch(ch) {
case ' ':
case '\n':
case '\t':
case '\r':
case '\f':
rtn = rtn.concat(String.valueOf(ch));
break;
case 'i':
case 'I':
rtn = rtn.concat(String.valueOf(ch));
ch = (char)in.read();
rtn = rtn.concat(String.valueOf(ch));
if(ch == 'm' || ch == 'M') {
ch = (char)in.read();
rtn = rtn.concat(String.valueOf(ch));
if(ch == 'g' || ch == 'G') {
rtn = rtn.concat(getToEnd(in));
return(rtn);
} else break loop2;
} else break loop2;
default:
rtn = rtn.concat(String.valueOf(ch));
break loop2;
} // end switch
} // end while (loop2)
out.print(rtn);
} else {
out.print(ch);
}
} // end while (loop1)
} catch (IOException e) {
System.err.println("IO error in dumpUntilImg");
}
return((String)null);
}
public static String getToEnd(BufferedInputStream in) throws IOException {
String rtn = new String();
char ch = ' ';
int b=0;
while(ch != '>') {
b = in.read();
if(b==-1) break;
ch = (char)b;
rtn = rtn.concat(String.valueOf(ch));
}
return(rtn);
}
public static boolean paramExists(String img_spec, String param) {
int begin, equals;
begin = img_spec.toLowerCase().indexOf(" " +param.toLowerCase());
if(begin == -1) return false;
equals = img_spec.indexOf("=",begin);
if(equals == -1) return false;
if(img_spec.substring(begin+param.length()+1,equals).trim().length() == 0)
return true;
else
return false;
}
public static String addHW(URL inURL, String img_spec) {
String rtn = new String(img_spec);
URL img_file;
Dimension dim;
try {
img_file = getImgURL(inURL, img_spec);
} catch(MalformedURLException e) {
System.err.println("Malformed HTML: "+e.getMessage());
return(img_spec);
}
/* Use the Hashtable to track the images we've encountered
so far. If one image is repeated several times
(bullets, lines, etc.) there is no need to retrieve the
image multiple times */
if(imageURLs.containsKey(img_file)) {
dim = (Dimension)imageURLs.get(img_file);
} else {
dim = getImgDimensions(img_file);
imageURLs.put(img_file,dim);
}
if(dim != null) {
rtn = insertHW(rtn, dim);
}
return(rtn);
}
public static String insertHW(String img_spec, Dimension dim) {
String rtn = new String(img_spec);
if(!paramExists(img_spec,"height")) {
rtn = insertParam(rtn, "HEIGHT="+dim.height);
}
if(!paramExists(img_spec,"width")) {
rtn = insertParam(rtn, "WIDTH="+dim.width);
}
return(rtn);
}
public static String insertParam(String img_spec, String param) {
String rtn=img_spec;
try {
rtn = img_spec.substring(0,img_spec.length()-1).concat(" "+param+">");
} catch(StringIndexOutOfBoundsException e) {}
return(rtn);
}
public static URL getImgURL(URL inURL, String img_spec) throws MalformedURLException {
String src = new String();
char ch;
int src_ind, i, equals, quote1, quote2;
src_ind = img_spec.toLowerCase().indexOf("src");
if(src_ind == -1) throw new MalformedURLException("No SRC parameter");
equals = img_spec.indexOf("=",src_ind);
try {
forl:
for(i=1; true; i++) {
ch = (char) img_spec.charAt(equals+i);
switch(ch) {
case ' ':
case '\n':
case '\t':
case '\r':
case '\f':
break;
default:
break forl;
}
}
} catch(StringIndexOutOfBoundsException e) {
throw new MalformedURLException("Malformed HTML SRC parameter");
}
if(ch == '\"') {
quote1 = equals+i;
quote2 = img_spec.indexOf("\"",quote1+1);
} else {
quote1 = equals+i-1;
quote2 = img_spec.indexOf(" ",quote1+1);
if(quote2 == -1) quote2 = img_spec.indexOf(">",quote1+1);
}
if(quote2 == -1) throw new MalformedURLException("No final quote");
try {
src = img_spec.substring(quote1+1, quote2);
} catch(StringIndexOutOfBoundsException e) {}
URL rtn = new URL(inURL,src);
return(rtn);
}
public static Dimension getImgDimensions(URL imgURL) {
Dimension dim;
try {
InputStream s = imgURL.openStream();
if((imgURL.getFile().toLowerCase().endsWith(".jpg")||
(imgURL.getFile().toLowerCase().endsWith(".jpeg"))))
dim = getJpgDimensions(s);
else
dim = getGifDimensions(s);
s.close();
} catch(IOException e) {
// indicated URL does not exist
return((Dimension)null);
}
return(dim);
}
public static Dimension getGifDimensions(InputStream s) throws IOException {
int x;
int b[] = new int[4];
Dimension rtn = new Dimension();
for(int i=0; i<6; i++) { // discard first six bytes
x = s.read();
}
for(int i=0; i<4; i++) { // next four bytes are width and height
b[i] = s.read();
}
rtn.width=b[0]+((b[1])<<8);
rtn.height=b[2]+((b[3])<<8);
if((rtn.width < 0)||(rtn.height < 0))
return((Dimension)null);
else
return(rtn);
}
public static Dimension getJpgDimensions(InputStream s) throws IOException {
int x;
int b[] = new int[4];
Dimension rtn = new Dimension();
while(true) { // discard bytes until we get to a 255,192 marker
x = s.read();
if(x == 255) {
if(s.read() == 192) {
for(int i=0; i<3; i++) { // discard another three bytes
s.read();
}
for(int i=0; i<4; i++) { // next four bytes are w/h
b[i] = s.read();
}
rtn.height=b[1]+((b[0])<<8);
rtn.width=b[3]+((b[2])<<8);
break;
}
}
}
if((rtn.height<0)||(rtn.width<0))
return((Dimension)null);
else
return(rtn);
}
public static void usage() {
System.err.println("Usage:");
System.err.println("AddImgHW [destination-file]");
System.err.println(" If destination file is omitted, output is to screen.");
}
}
New on the Java Boutique:
New Review:
Time Management Made Easy with the Quartz Enterprise Job Scheduler
Why not just use the Java timer API? This open source scheduling
API boasts simplicity, ease-of-integration, a well-rounded feature
set, and it's free!
New Applet:
Reverse Complement
Reverse Complement is a simple applet that converts DNA or RNA
sequences into three useful formats.
Elsewhere on internet.com:
WebDeveloper Java
Lots of Java information on webdeveloper.com
WDVL Java
Thorough Java resource at the Web Developer's Virtual Library.
ScriptSearch Java
Hundreds of free Java code files to download.
jGuru: Your View of the Java Universe
Customizable portal with online training, FAQs, regular news updates, and tutorials.
|