org.apache.mahout.utils.email
Class MailOptions

java.lang.Object
  extended by org.apache.mahout.utils.email.MailOptions

public class MailOptions
extends Object

Configuration options to be used by MailProcessor. Includes options controlling the exact output format and which mail fields are included (body, to, from, subject, etc.)


Field Summary
static Pattern DEFAULT_QUOTED_TEXT
           
static String FROM
           
static String REFS
           
static String SUBJECT
           
static String TO
           
 
Constructor Summary
MailOptions()
           
 
Method Summary
 String getBodySeparator()
           
 Charset getCharset()
           
 int getChunkSize()
           
 File getInput()
           
 String getOutputDir()
           
 Map<String,Integer> getPatternOrder()
           
 Pattern[] getPatternsToMatch()
           
 String getPrefix()
           
 Pattern getQuotedTextPattern()
           
 String getSeparator()
           
 boolean isIncludeBody()
           
 boolean isStripQuotedText()
           
 void setBodySeparator(String bodySeparator)
          Sets the separator to use in the output between lines in the body, the default is "\n".
 void setCharset(Charset charset)
          Sets the encoding of the input
 void setChunkSize(int chunkSize)
          Sets the size of each generated sequence file, in Megabytes.
 void setIncludeBody(boolean includeBody)
          Sets whether mail bodies are included in the output
 void setInput(File input)
           
 void setOutputDir(String outputDir)
          Sets the output directory where sequence files will be written.
 void setPatternOrder(Map<String,Integer> patternOrder)
           
 void setPatternsToMatch(Pattern[] patternsToMatch)
          Sets the list of patterns to be applied in the given order to extract metadata fields (to, from, subject, etc.) from the input
 void setPrefix(String prefix)
          Sets the prefix that is combined with the archive name and with message ids to create SequenceFile keys.
 void setQuotedTextPattern(Pattern quotedTextPattern)
          Sets the Pattern to use to identify lines that are quoted text.
 void setSeparator(String separator)
          Sets the separator to use in the output between metadata items (to, from, etc.).
 void setStripQuotedText(boolean stripQuotedText)
          Sets whether quoted text such as lines starting with | or > is striped off.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

FROM

public static final String FROM
See Also:
Constant Field Values

TO

public static final String TO
See Also:
Constant Field Values

REFS

public static final String REFS
See Also:
Constant Field Values

SUBJECT

public static final String SUBJECT
See Also:
Constant Field Values

DEFAULT_QUOTED_TEXT

public static final Pattern DEFAULT_QUOTED_TEXT
Constructor Detail

MailOptions

public MailOptions()
Method Detail

getInput

public File getInput()

setInput

public void setInput(File input)

getOutputDir

public String getOutputDir()

setOutputDir

public void setOutputDir(String outputDir)
Sets the output directory where sequence files will be written.


getPrefix

public String getPrefix()

setPrefix

public void setPrefix(String prefix)
Sets the prefix that is combined with the archive name and with message ids to create SequenceFile keys.

Parameters:
prefix - The name of the directory containing the mail archive is commonly used.

getChunkSize

public int getChunkSize()

setChunkSize

public void setChunkSize(int chunkSize)
Sets the size of each generated sequence file, in Megabytes.


getCharset

public Charset getCharset()

setCharset

public void setCharset(Charset charset)
Sets the encoding of the input


getSeparator

public String getSeparator()

setSeparator

public void setSeparator(String separator)
Sets the separator to use in the output between metadata items (to, from, etc.).


getBodySeparator

public String getBodySeparator()

setBodySeparator

public void setBodySeparator(String bodySeparator)
Sets the separator to use in the output between lines in the body, the default is "\n".


isIncludeBody

public boolean isIncludeBody()

setIncludeBody

public void setIncludeBody(boolean includeBody)
Sets whether mail bodies are included in the output


getPatternsToMatch

public Pattern[] getPatternsToMatch()

setPatternsToMatch

public void setPatternsToMatch(Pattern[] patternsToMatch)
Sets the list of patterns to be applied in the given order to extract metadata fields (to, from, subject, etc.) from the input


getPatternOrder

public Map<String,Integer> getPatternOrder()

setPatternOrder

public void setPatternOrder(Map<String,Integer> patternOrder)

isStripQuotedText

public boolean isStripQuotedText()
Returns:
true if we should strip out quoted email text

setStripQuotedText

public void setStripQuotedText(boolean stripQuotedText)
Sets whether quoted text such as lines starting with | or > is striped off.


getQuotedTextPattern

public Pattern getQuotedTextPattern()

setQuotedTextPattern

public void setQuotedTextPattern(Pattern quotedTextPattern)
Sets the Pattern to use to identify lines that are quoted text. Default is | and >

See Also:
setStripQuotedText(boolean)


Copyright © 2008–2014 The Apache Software Foundation. All rights reserved.