public class SimplePostTool extends Object
Modifier and Type | Class and Description |
---|---|
static class |
SimplePostTool.BAOS |
static class |
SimplePostTool.PageFetcherResult
Utility class to hold the result form a page fetch
|
Constructor and Description |
---|
SimplePostTool() |
SimplePostTool(String mode,
URL url,
boolean auto,
String type,
String format,
int recursive,
int delay,
String fileTypes,
OutputStream out,
boolean commit,
boolean optimize,
String[] args)
Constructor which takes in all mandatory input for the tool to work.
|
Modifier and Type | Method and Description |
---|---|
static String |
appendParam(String url,
String param)
Appends a URL query parameter to a URL
|
protected static URL |
appendUrlPath(URL url,
String append)
Appends to the path of the URL
|
void |
commit()
Does a simple commit operation
|
protected String |
computeFullUrl(URL baseUrl,
String link)
Computes the full URL based on a base url and a possibly relative link found
in the href param of an HTML anchor.
|
void |
doGet(String url)
Performs a simple get on the given URL
|
void |
doGet(URL url)
Performs a simple get on the given URL
|
void |
execute()
After initialization, call execute to start the post job.
|
FileFilter |
getFileFilterFromFileTypes(String fileTypes) |
static NodeList |
getNodesFromXP(Node n,
String xpath)
Gets all nodes matching an XPath
|
static String |
getXP(Node n,
String xpath,
boolean concatAll)
Gets the string content of the matching an XPath
|
protected static String |
guessType(File file)
Guesses the type of a file, based on file name suffix
Returns "application/octet-stream" if no corresponding mimeMap type.
|
static ByteBuffer |
inputStreamToByteArray(InputStream is) |
static ByteBuffer |
inputStreamToByteArray(InputStream is,
long maxSize)
Reads an input stream into a byte array
|
protected static boolean |
isOn(String property)
Tests if a string is either "true", "on", "yes" or "1"
|
static void |
main(String[] args)
See usage() for valid command line usage
|
static Document |
makeDom(byte[] in)
Takes a string as input and returns a DOM
|
protected static String |
normalizeUrlEnding(String link)
Normalizes a URL string by removing anchor part and trailing slash
|
void |
optimize()
Does a simple optimize operation
|
protected static SimplePostTool |
parseArgsAndInit(String[] args)
Parses incoming arguments and system params and initializes the tool
|
boolean |
postData(InputStream data,
Long length,
OutputStream output,
String type,
URL url)
Reads data from the data stream and posts it to solr,
writes to the response to output
|
void |
postFile(File file,
OutputStream output,
String type)
Opens the file and posts its contents to the solrUrl,
writes to response to output.
|
int |
postFiles(File[] files,
int startIndexInArgs,
OutputStream out,
String type)
Post all filenames provided in args
|
int |
postFiles(String[] args,
int startIndexInArgs,
OutputStream out,
String type)
Post all filenames provided in args
|
int |
postWebPages(String[] args,
int startIndexInArgs,
OutputStream out)
This method takes as input a list of start URL strings for crawling,
adds each one to a backlog and then starts crawling
|
static InputStream |
stringToStream(String s)
Converts a string to an input stream
|
protected boolean |
typeSupported(String type)
Uses the mime-type map to reverse lookup whether the file ending for our type
is supported by the fileTypes option
|
protected int |
webCrawl(int level,
OutputStream out)
A very simple crawler, pulling URLs to fetch from a backlog and then
recurses N levels deep if recursive>0.
|
public SimplePostTool(String mode, URL url, boolean auto, String type, String format, int recursive, int delay, String fileTypes, OutputStream out, boolean commit, boolean optimize, String[] args)
mode
- whether to post files, web pages, params or stdinurl
- the Solr base Url to post to, should end with /updateauto
- if true, we'll guess type and add resourcename/urltype
- content-type of the data you are postingrecursive
- number of levels for file/web mode, or 0 if one file onlydelay
- if recursive then delay will be the wait time between postsfileTypes
- a comma separated list of file-name endings to accept for file/webout
- an OutputStream to write output to, e.g. stdout to print to consolecommit
- if true, will commit at end of postingoptimize
- if true, will optimize at end of postingargs
- a String[] of arguments, varies between modespublic SimplePostTool()
public static void main(String[] args)
args
- the params on the command linepublic void execute()
protected static SimplePostTool parseArgsAndInit(String[] args)
args
- the incoming cmd line argspublic int postFiles(String[] args, int startIndexInArgs, OutputStream out, String type)
args
- array of file namesstartIndexInArgs
- offset to startout
- output stream to post data totype
- default content-type to use when posting (may be overridden in auto mode)public int postFiles(File[] files, int startIndexInArgs, OutputStream out, String type)
files
- array of FilesstartIndexInArgs
- offset to startout
- output stream to post data totype
- default content-type to use when posting (may be overridden in auto mode)public int postWebPages(String[] args, int startIndexInArgs, OutputStream out)
args
- the raw input args from main()startIndexInArgs
- offset for where to startout
- outputStream to write results toprotected static String normalizeUrlEnding(String link)
protected int webCrawl(int level, OutputStream out)
level
- which level to crawlout
- output stream to write topublic static ByteBuffer inputStreamToByteArray(InputStream is) throws IOException
IOException
public static ByteBuffer inputStreamToByteArray(InputStream is, long maxSize) throws IOException
is
- the input streamIOException
- If there is a low-level I/O error.protected String computeFullUrl(URL baseUrl, String link)
baseUrl
- the base url from where the link was foundlink
- the absolute or relative linkprotected boolean typeSupported(String type)
type
- what content-type to lookupprotected static boolean isOn(String property)
property
- the string to testpublic void commit()
public void optimize()
public static String appendParam(String url, String param)
url
- the original URLparam
- the parameter(s) to append, separated by "&"public void postFile(File file, OutputStream output, String type)
protected static URL appendUrlPath(URL url, String append) throws MalformedURLException
url
- the URLappend
- the path to appendMalformedURLException
protected static String guessType(File file)
file
- the filepublic void doGet(String url)
public void doGet(URL url)
public boolean postData(InputStream data, Long length, OutputStream output, String type, URL url)
public static InputStream stringToStream(String s)
s
- the stringpublic FileFilter getFileFilterFromFileTypes(String fileTypes)
public static NodeList getNodesFromXP(Node n, String xpath) throws XPathExpressionException
XPathExpressionException
public static String getXP(Node n, String xpath, boolean concatAll) throws XPathExpressionException
n
- the node (or doc)xpath
- the xpath stringconcatAll
- if true, text from all matching nodes will be concatenated, else only the first returnedXPathExpressionException
public static Document makeDom(byte[] in) throws SAXException, IOException, ParserConfigurationException
Copyright © 2000-2019 Apache Software Foundation. All Rights Reserved.