|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectsofie.parsing.Parser
public class Parser
Class Parser This class is part of the SOFIE system (http://mpii.de/yago-naga/sofie). It is licensed under the Creative Commons Attribution-Noncommercial-Share-Alike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/) by Fabian M. Suchanek (http://suchanek.name). If you use this class for scientific purposes, please cite Fabian M. Suchanek, Mauro Sozio, Gerhard Weikum "SOFIE: A Self-Organizing Framework for Information Extraction" (International World Wide Web Conference 2009) This class parses the natural language documents that serve as input for SOFIE.
Field Summary | |
---|---|
protected static java.util.Set<java.lang.String> |
commonWords
Contains all words from WordNet |
static int |
DOMAIN
Constant for interestingTypes() |
static int |
RANGE
Constant for interestingTypes() |
protected java.util.Map<java.lang.String,java.lang.Integer> |
wordsOfCurrentDocument
Stores all words of the current document with their frequency |
Constructor Summary | |
---|---|
Parser(java.io.File iniFile,
boolean keepPatterns)
Parses the files |
Method Summary | |
---|---|
void |
close()
|
static java.util.Map<java.lang.String,java.lang.Integer> |
collectWordsFromTokens(java.util.List<Token> tokens)
Collects all words in the current document |
java.util.Collection<java.lang.String> |
disambiguate(Token token1)
Determines whether the token is ambigous, unambiguos or void. |
void |
findPatterns(java.util.List<Token> tokens)
Finds and stores the patterns from a list of tokens |
protected void |
generateDisambiguationPrior(Token token,
java.util.Set<java.lang.String> meanings)
Generates the disambiguation prior and stores it in the database |
static boolean |
interestingTypes(Token entity,
int domainRange)
Checks whether the pair of entities is interesting. |
static boolean |
isCommonword(java.lang.String w)
Returns words |
boolean |
isPronoun(java.lang.String s)
Tells whether a word is a pronoun |
static void |
loadWordNet(java.io.File folder)
Loads the words of WordNet |
static void |
main(java.lang.String[] args)
Finds patterns in a file |
protected java.lang.String |
means(java.lang.String table,
java.lang.String arg1,
java.lang.String arg2)
Returns the string "table.relation='means' AND table.arg1=... |
java.lang.String |
nameOfEntity(java.lang.String y)
Returns a normal word for a Yago identifier |
java.util.Set<java.lang.String> |
ontologicalContext(java.lang.String entity)
Returns the context of an entity from the database |
void |
parse(java.util.List<java.lang.String> filenames,
int start)
|
protected void |
replaceFamilyName(Token token,
java.util.List<Token> tokens)
Checks whether an entity is a person and redirects the familyname |
void |
resetPatterns()
Resets all patterns |
void |
saveBinaryPattern(java.util.List<Token> tokens,
int start,
int end)
Checks and stores a binary pattern to the database |
void |
savePattern(java.lang.String simplifiedPattern,
java.lang.String arg1,
java.lang.String arg2)
Stores a pattern in the database |
java.lang.String |
simplifiedPattern(java.util.List<Token> tokens,
int start,
int end)
Returns a String representation of a pattern (excluding start and end) |
protected int |
update(java.lang.String sql)
Executes a database update. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected static java.util.Set<java.lang.String> commonWords
public static final int DOMAIN
public static final int RANGE
protected java.util.Map<java.lang.String,java.lang.Integer> wordsOfCurrentDocument
Constructor Detail |
---|
public Parser(java.io.File iniFile, boolean keepPatterns) throws java.lang.Exception
java.lang.Exception
Method Detail |
---|
protected java.lang.String means(java.lang.String table, java.lang.String arg1, java.lang.String arg2)
protected int update(java.lang.String sql) throws java.sql.SQLException
java.sql.SQLException
public boolean isPronoun(java.lang.String s)
public java.lang.String nameOfEntity(java.lang.String y)
public static void loadWordNet(java.io.File folder)
public static boolean isCommonword(java.lang.String w)
public static boolean interestingTypes(Token entity, int domainRange)
protected void replaceFamilyName(Token token, java.util.List<Token> tokens) throws java.sql.SQLException
java.sql.SQLException
public java.util.Set<java.lang.String> ontologicalContext(java.lang.String entity) throws java.sql.SQLException
java.sql.SQLException
protected void generateDisambiguationPrior(Token token, java.util.Set<java.lang.String> meanings) throws java.sql.SQLException
java.sql.SQLException
public java.util.Collection<java.lang.String> disambiguate(Token token1) throws java.sql.SQLException
java.sql.SQLException
public java.lang.String simplifiedPattern(java.util.List<Token> tokens, int start, int end)
public void savePattern(java.lang.String simplifiedPattern, java.lang.String arg1, java.lang.String arg2) throws java.sql.SQLException
java.sql.SQLException
public void saveBinaryPattern(java.util.List<Token> tokens, int start, int end) throws java.sql.SQLException
java.sql.SQLException
public static java.util.Map<java.lang.String,java.lang.Integer> collectWordsFromTokens(java.util.List<Token> tokens)
public void findPatterns(java.util.List<Token> tokens) throws java.sql.SQLException
java.sql.SQLException
public void resetPatterns() throws java.sql.SQLException
java.sql.SQLException
public void parse(java.util.List<java.lang.String> filenames, int start) throws java.lang.Exception
java.lang.Exception
public void close()
public static void main(java.lang.String[] args) throws java.lang.Exception
java.lang.Exception
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |