-
Bug
-
Resolution: Not an Issue
-
P4
-
None
-
10.0.1
A DESCRIPTION OF THE PROBLEM :
The program is converted into a .jar file and the pdf and its page numbers to be read are entered as input in the command line as input. For a pdf file with only 1 page in it, it works else it fails! Looks like problem with the library
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
convert the program into a jar file and using cmd run..... java -jar BoomPdf.jar test.pdf 2 3 ...where Boompdf.jar is the java program converted into jar , test.pdf is the pdf file as the input 2 is the start page to be processed/scanned and 3 is the last page
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Should scan the expected pages and display the output
ACTUAL -
Looks like only the first page is scanned no matter what the i/p... the library is open source one.. it might be having errors in it?
---------- BEGIN SOURCE ----------
package p1;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
/**
* This is an example on how to get the x/y coordinates and size of each character in PDF
*/
public class GetCharLocationAndSize extends PDFTextStripper {
public GetCharLocationAndSize() throws IOException {
}
/**
* @throws IOException If there is an error parsing the document.
*/
public static void main( String[] args ) throws IOException {
PDDocument document = null;
// String fileName;
String fileName= new String(args[0]);
// File inFile = new File(args[0]);
// fileName = "C:\\Users\\barna.cherian\\Desktop\\apache.pdf";
int x=0,y=0;
String a= (args[1]);
x=Integer.parseInt(a);
String b=(args[2]);
y=Integer.parseInt(b);
try {
document = PDDocument.load( new File(fileName) );
PDFTextStripper stripper = new GetCharLocationAndSize();
stripper.setSortByPosition( true );
stripper.setStartPage(x);
stripper.setEndPage(y);
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
stripper.writeText(document, dummy);
}
finally {
if( document != null ) {
document.close();
}
}
}
/**
* Override the default functionality of PDFTextStripper.writeString()
*/
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
for (TextPosition text : textPositions) {
System.out.println(text.getUnicode()+ " [(X=" + text.getXDirAdj() + ",Y=" +
text.getYDirAdj() + ") height=" + text.getHeightDir() + " width=" +
text.getWidthDirAdj() + "]");
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
No Workarounds
The program is converted into a .jar file and the pdf and its page numbers to be read are entered as input in the command line as input. For a pdf file with only 1 page in it, it works else it fails! Looks like problem with the library
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
convert the program into a jar file and using cmd run..... java -jar BoomPdf.jar test.pdf 2 3 ...where Boompdf.jar is the java program converted into jar , test.pdf is the pdf file as the input 2 is the start page to be processed/scanned and 3 is the last page
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Should scan the expected pages and display the output
ACTUAL -
Looks like only the first page is scanned no matter what the i/p... the library is open source one.. it might be having errors in it?
---------- BEGIN SOURCE ----------
package p1;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
/**
* This is an example on how to get the x/y coordinates and size of each character in PDF
*/
public class GetCharLocationAndSize extends PDFTextStripper {
public GetCharLocationAndSize() throws IOException {
}
/**
* @throws IOException If there is an error parsing the document.
*/
public static void main( String[] args ) throws IOException {
PDDocument document = null;
// String fileName;
String fileName= new String(args[0]);
// File inFile = new File(args[0]);
// fileName = "C:\\Users\\barna.cherian\\Desktop\\apache.pdf";
int x=0,y=0;
String a= (args[1]);
x=Integer.parseInt(a);
String b=(args[2]);
y=Integer.parseInt(b);
try {
document = PDDocument.load( new File(fileName) );
PDFTextStripper stripper = new GetCharLocationAndSize();
stripper.setSortByPosition( true );
stripper.setStartPage(x);
stripper.setEndPage(y);
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
stripper.writeText(document, dummy);
}
finally {
if( document != null ) {
document.close();
}
}
}
/**
* Override the default functionality of PDFTextStripper.writeString()
*/
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
for (TextPosition text : textPositions) {
System.out.println(text.getUnicode()+ " [(X=" + text.getXDirAdj() + ",Y=" +
text.getYDirAdj() + ") height=" + text.getHeightDir() + " width=" +
text.getWidthDirAdj() + "]");
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
No Workarounds