|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object javatools.parsers.Name
public class Name
This class is part of the Java Tools (see http://mpii.de/yago-naga/javatools).
It is licensed under the Creative Commons Attribution License
(see http://creativecommons.org/licenses/by/3.0) by
the YAGO-NAGA team (see http://mpii.de/yago-naga).
The class Name represents a name. There are three sub-types (subclasses) of names:
Abbreviations, person names and company names. These subclasses provide methods to
access the components of the name (like the family name). Use the factory method Name.of to
create a Name-object of the appropriate subclass.
Example:
Name.isName("Mouse"); --> true Name.isAbbreviation("PMM"); --> true Name.isPerson("Mickey Mouse"); --> false Name.couldBePerson("Mickey Mouse"); --> true Name.isPerson("Prof. Mickey Mouse"); --> true Name.of("Prof. Dr. Fabian the Great III of Saarbruecken").describe() // equivalent to new PersonName(...) in this case --> PersonName Original: Prof. Dr. Fabian the Great III of Saarbruecken Titles: Prof. Dr. Given Name: Fabian Given Names: Fabian Family Name Prefix: null Attribute Prefix: the Family Name: null Attribute: Great Family Name Suffix: null Roman: III City: Saarbruecken Normalized: Fabian_Great
Nested Class Summary | |
---|---|
static class |
Name.Abbreviation
|
static class |
Name.CompanyName
|
static class |
Name.PersonName
|
Field Summary | |
---|---|
static java.lang.String |
A
Contains characters |
static java.lang.String |
ANYNAME
Holds the general default name |
static java.lang.String |
attributePrefix
Contains attribute Prefixes (like "the" in "Alexander the Great") |
static java.util.regex.Pattern |
attributePrefixPattern
|
static java.lang.String |
B
Contains blank |
static java.lang.String |
BC
Contains blank with optional comma |
static java.lang.String |
BD
Contains a word boundary |
static java.lang.String |
companyNameSuffix
Contains common company name suffixes (like "Inc") |
static java.util.regex.Pattern |
companyNameSuffixPattern
|
static java.lang.String |
DG
Contains digits |
static java.lang.String |
directFamilyNamePrefix
A direct family name prefix (such as "Mc") |
static java.lang.String |
familyName
Name component with an optional familyNamePrefix and postfix |
static java.lang.String |
familyNamePrefix
Contains common family name prefixes (like "von") |
static java.util.regex.Pattern |
familyNamePrefixPattern
|
static java.lang.String |
familyNameSuffix
Contains common name suffixes (like "Junior") |
static java.util.regex.Pattern |
familyNameSuffixPattern
|
static java.lang.String |
givenName
The pattern "Name[-Name]" |
static java.lang.String |
givenNameComponent
The pattern "Name." |
static java.lang.String |
givenNames
The pattern (personNameComponent+B)+ |
static java.lang.String |
H
Contains hypens |
static java.lang.String |
L
Contains lower case Characters |
static java.util.Map<java.lang.String,java.lang.String> |
languageCodes
Language codes |
static java.util.regex.Pattern |
laxAbbreviationPattern
Contains the lax pattern for abbreviations |
static java.util.regex.Pattern |
laxCompanyPattern
Contains the pattern for companies |
static java.lang.String |
laxName
Contains the pattern for names. |
static java.util.regex.Pattern |
laxNamePattern
Contains the pattern for names. |
static java.util.regex.Pattern |
laxPersonNamePattern
The pattern for person names |
static java.util.Map<java.lang.String,java.lang.String> |
nationality2country
|
static java.lang.String |
nickName
Nickname '...' |
static java.lang.String |
of
Contains "of" |
static java.lang.String |
or
Contains "|" |
static java.lang.String |
personNameComponent
The pattern "Name" |
static java.lang.String |
prep
Contains prepositions |
static java.lang.String |
roman
Contains romam digits |
static java.util.regex.Pattern |
safeAbbreviationPattern
Contains the safe pattern for abbreviations |
static java.util.regex.Pattern |
safeCompanyPattern
Contains the safe pattern for companies |
static java.lang.String |
safeName
Contains a pattern that indicates strings that are very likely to be names |
static java.util.regex.Pattern |
safeNamePattern
Contains a pattern that indicates strings that are very likely to be names |
static java.util.regex.Pattern |
safeNamesPattern
Contains a pattern that indicates strings that are very likely to be names |
static java.util.regex.Pattern |
safeNamesPatternNoPrep
Contains a pattern that indicates strings that are very likely to be names |
static java.lang.String |
safePersonName
The pattern for strings that are person names with high probability |
static java.util.regex.Pattern |
safePersonNamePattern
|
static FinalSet<java.lang.String> |
stopWords
Contains stopwords |
static java.lang.String |
teamName
team name |
static java.util.regex.Pattern |
teamNamePattern
|
static java.lang.String |
title
Contains common titles (like "Mr.") |
static java.util.regex.Pattern |
titlePattern
|
static java.lang.String |
titles
|
static java.util.Set<java.lang.String> |
titlesForGivenNames
Contains those titles that go with the given name (e.g. |
static java.lang.String |
U
Contains upper case Characters |
static java.util.Map<java.lang.String,java.lang.String> |
usStates
|
Method Summary | |
---|---|
static java.lang.String |
c(java.lang.String s)
Capturing group |
static boolean |
couldBeAbbreviation(java.lang.String word)
Tells whether a string could be abbreviation. |
static boolean |
couldBeCompanyName(java.lang.String s)
Tells if the string could be a company name |
static boolean |
couldBeName(java.lang.String s)
Tells whether a String could possibly be a name |
static boolean |
couldBePersonName(java.lang.String s)
Returns true if it is possible that the string is a person name |
java.lang.String |
describe()
Returns a description |
static boolean |
isAbbreviation(java.lang.String word)
Tells whether a string is an abbreviation with high probability |
static boolean |
isAttributePrefix(java.lang.String s)
Says whether this String is an attribute Prefix (like "the" in "Alexander the Great") |
static boolean |
isCompanyName(java.lang.String s)
Tells if the string is a company name with high probability |
static boolean |
isCompanyNameSuffix(java.lang.String s)
Says whether this String is a company name suffix |
static boolean |
isFamilyNamePrefix(java.lang.String s)
Says whether this String is a family name prefix |
static boolean |
isLanguage(java.lang.String s)
Returns TRUE for languages |
static boolean |
isLanguageCode(java.lang.String s)
Returns TRUE for language codes |
static boolean |
isName(java.lang.String s)
Tells whether a String is a name with high probability |
static boolean |
isNames(java.lang.String s)
Tells whether a String is a sequence of names with high probability |
static boolean |
isNation(java.lang.String s)
Returns TRUE for nations |
static boolean |
isNationality(java.lang.String s)
Returns TRUE for nationalities |
static boolean |
isPersonName(java.lang.String m)
Returns true if it is highly probable that the string is a person name. |
static boolean |
isPersonNameSuffix(java.lang.String s)
Says whether this String is a person name suffix |
static boolean |
isStopWord(java.lang.String w)
TRUE for stopwords |
static boolean |
isTitle(java.lang.String s)
Says whether this String is a title |
static boolean |
isUSState(java.lang.String s)
Returns TRUE for US States |
static boolean |
isUSStateAbbreviation(java.lang.String s)
Returns TRUE for US State abbreviations |
static java.lang.String |
languageForCode(java.lang.String s)
Returns the language for a code (or NULL) |
static void |
main(java.lang.String[] argv)
Test routine |
static java.lang.String |
mul(java.lang.String s)
Repeats the token with blanks one or more times |
static java.lang.String |
mulHyp(java.lang.String s)
Repeats the token with hyphens one or more times |
static java.lang.String |
nationForNationality(java.lang.String s)
Returns the nation for a nationality (or NULL) |
java.lang.String |
normalize()
Returns the letters and digits of the original name (eliminates punctuation) |
static Name |
of(java.lang.String s)
Factory pattern |
static java.lang.String |
opt(java.lang.String s)
optional component |
static java.lang.String |
optMul(java.lang.String s)
optional multiple component |
static java.lang.String |
or(java.lang.String s1,
java.lang.String s2)
alternavive |
java.lang.String |
original()
Returns the original name |
java.lang.String |
toString()
Returns the original name |
static java.lang.String |
unabbreviateUSState(java.lang.String s)
Returns the US sate for an abbreviation (or NULL) |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
public static final java.lang.String ANYNAME
public static java.lang.String roman
public static java.lang.String of
public static final java.lang.String U
public static final java.lang.String L
public static final java.lang.String A
public static final java.lang.String B
public static final java.lang.String BD
public static final java.lang.String BC
public static final java.lang.String DG
public static final java.lang.String H
public static final java.lang.String or
public static final java.lang.String familyNamePrefix
public static final java.util.regex.Pattern familyNamePrefixPattern
public static java.lang.String attributePrefix
public static java.util.regex.Pattern attributePrefixPattern
public static final java.lang.String familyNameSuffix
public static final java.util.regex.Pattern familyNameSuffixPattern
public static final java.lang.String title
public static final java.util.regex.Pattern titlePattern
public static final java.lang.String titles
public static java.util.Set<java.lang.String> titlesForGivenNames
public static final java.lang.String companyNameSuffix
public static final java.util.regex.Pattern companyNameSuffixPattern
public static final java.lang.String teamName
public static final java.util.regex.Pattern teamNamePattern
public static final java.lang.String prep
public static final java.lang.String laxName
public static final java.util.regex.Pattern laxNamePattern
public static final java.lang.String safeName
public static final java.util.regex.Pattern safeNamePattern
public static final java.util.regex.Pattern safeNamesPattern
public static final java.util.regex.Pattern safeNamesPatternNoPrep
public static final java.util.regex.Pattern laxAbbreviationPattern
public static final java.util.regex.Pattern safeAbbreviationPattern
public static final java.util.regex.Pattern laxCompanyPattern
public static final java.util.regex.Pattern safeCompanyPattern
public static final java.lang.String directFamilyNamePrefix
public static final java.lang.String personNameComponent
public static final java.lang.String givenNameComponent
public static final java.lang.String givenName
public static final java.lang.String givenNames
public static final java.lang.String familyName
public static final java.lang.String nickName
public static final java.util.regex.Pattern laxPersonNamePattern
public static final java.lang.String safePersonName
public static final java.util.regex.Pattern safePersonNamePattern
public static FinalSet<java.lang.String> stopWords
public static java.util.Map<java.lang.String,java.lang.String> usStates
public static java.util.Map<java.lang.String,java.lang.String> languageCodes
public static java.util.Map<java.lang.String,java.lang.String> nationality2country
Method Detail |
---|
public static java.lang.String mul(java.lang.String s)
public static java.lang.String mulHyp(java.lang.String s)
public static java.lang.String opt(java.lang.String s)
public static java.lang.String optMul(java.lang.String s)
public static java.lang.String or(java.lang.String s1, java.lang.String s2)
public static java.lang.String c(java.lang.String s)
public static boolean isFamilyNamePrefix(java.lang.String s)
public static boolean isAttributePrefix(java.lang.String s)
public static boolean isPersonNameSuffix(java.lang.String s)
public static boolean isTitle(java.lang.String s)
public static boolean isCompanyNameSuffix(java.lang.String s)
public static boolean isName(java.lang.String s)
public static boolean isNames(java.lang.String s)
public static boolean couldBeName(java.lang.String s)
public java.lang.String toString()
toString
in class java.lang.Object
public java.lang.String normalize()
public java.lang.String describe()
public java.lang.String original()
public static boolean isAbbreviation(java.lang.String word)
public static boolean couldBeAbbreviation(java.lang.String word)
public static boolean isCompanyName(java.lang.String s)
public static boolean couldBeCompanyName(java.lang.String s)
public static boolean couldBePersonName(java.lang.String s)
public static boolean isPersonName(java.lang.String m)
public static boolean isStopWord(java.lang.String w)
public static boolean isUSState(java.lang.String s)
public static boolean isUSStateAbbreviation(java.lang.String s)
public static java.lang.String unabbreviateUSState(java.lang.String s)
public static boolean isLanguage(java.lang.String s)
public static boolean isLanguageCode(java.lang.String s)
public static java.lang.String languageForCode(java.lang.String s)
public static boolean isNation(java.lang.String s)
public static boolean isNationality(java.lang.String s)
public static java.lang.String nationForNationality(java.lang.String s)
public static Name of(java.lang.String s)
public static void main(java.lang.String[] argv) throws java.lang.Exception
java.lang.Exception
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |