javatools.parsers
Class DateParser

java.lang.Object
  extended by javatools.parsers.DateParser

public class DateParser
extends java.lang.Object

This class is part of the Java Tools (see http://mpii.de/yago-naga/javatools). It is licensed under the Creative Commons Attribution License (see http://creativecommons.org/licenses/by/3.0) by the YAGO-NAGA team (see http://mpii.de/yago-naga). The DateParser normalizes date expressions in english natural language text to ISO-dates

year-month-day

where year is either positive negative. The DateParser understands expressions like "4th century BC" or "3rd of November 2004". Dates may be underspecified: The character '#' stands for "at least one digit".
Example:
 DateParser.normalize("It was November 23rd to 24th 1998.")
          --> "It was 1998-11-23 to 1998-11-24."
          DateParser.getDate("It was 1998-11-23 to 1998-11-24.")
          -->  1998, 11, 23
          NumberFormatter.ISOtime(DateParser.getCalendar("November 24th 1998"))
          --> 1998-12-24 T 00:00:00.00
 
TODO 1st century transformed to 0##-##-##. Should be 00##-##-##. Just padding doesn't work because millenium would then be padded too much (need to distinguish between centuries and millenias).


Field Summary
static java.lang.String DATE
           
static java.util.regex.Pattern DATEPATTERN
           
static java.util.regex.Pattern JUSTDATEPATTERN
           
static java.lang.String SDATE
           
static java.util.regex.Pattern SDATEPATTERN
           
static java.util.regex.Pattern SIMPLEYEARPATTERN
          A year as a pattern
static java.util.regex.Pattern YEARPATTERN
           
 
Constructor Summary
DateParser()
           
 
Method Summary
static java.util.Calendar asCalendar(int[] date)
          Converts a normalized Date to a Calendar
static java.util.Calendar asCalendar(java.lang.String date)
           
static java.util.Calendar asCalendar(java.lang.String[] date)
           
static int[] asInts(java.lang.String[] yearMonthDay)
          Parses the normalized date into ints, putting Integer.MAX_VALUE for '#'.
static boolean disjoint(int[] date1, int[] date2)
          TRUE if the dates are disjoint, i.e.
static boolean disjoint(java.lang.String[] date1, java.lang.String[] date2)
           
static boolean disjoint(java.lang.String date1, java.lang.String date2)
           
static boolean equal(int[] date1, int[] date2)
          TRUE if the dates are exactly equal, including '#'
static boolean equal(java.lang.String[] date1, java.lang.String[] date2)
           
static boolean equal(java.lang.String date1, java.lang.String date2)
           
static java.util.Collection<Triple<java.lang.String,java.lang.Integer,java.lang.Integer>> getAllDatePositions(java.lang.CharSequence s)
          Returns all the dates values and their position in the text
static java.util.Collection<Triple<java.lang.String,java.lang.Integer,java.lang.Integer>> getAllDatePositions(java.lang.CharSequence s, Language language)
          Returns all the dates values and their position in the text
static java.util.Collection<java.lang.String> getAllDates(java.lang.CharSequence s)
          Normalizes all dates in a String
static java.util.Collection<java.lang.String> getAllDates(java.lang.CharSequence s, Language language)
          Normalizes all dates in a String
static java.lang.String[] getDate(java.lang.CharSequence d)
          Returns the components of the date (year, month, day) in a normalized date string (or null)
static java.lang.String[] getDate(java.lang.CharSequence d, int[] pos)
          Returns the components of the date (year, month, day) in a normalized date string (or null) and writes the start and end position in pos[0] and pos[1]
static java.util.List<java.lang.String> getDates(java.lang.CharSequence d)
          Returns the dates in a normalized date string
static boolean includes(int[] date1, int[] date2)
          TRUE if the first date includes the second, e.g., 1800-##-## includes 1800-05-##
static boolean includes(java.lang.String[] date1, java.lang.String[] date2)
           
static boolean includes(java.lang.String date1, java.lang.String date2)
           
static boolean isDate(java.lang.CharSequence s)
          Tells whether this string is a normlized date (and nothing else)
static boolean isEarlier(int[] date1, int[] date2)
          TRUE if the first date is earlier than the second.
static boolean isEarlier(java.lang.String[] date1, java.lang.String[] date2)
          TRUE if the first date is earlier than the second.
static boolean isEarlier(java.lang.String date1, java.lang.String date2)
           
static void main(java.lang.String[] argv)
          Test routine
static java.lang.String newDate(int y, int m, int d)
          Creates a date-string from a day, month and year as ints
static java.lang.String newDate(java.lang.String y, java.lang.String m, java.lang.String d)
          Creates a date-string of the form "year-month-day"
static java.lang.String newSubDate(java.lang.String y, java.lang.String m)
          Creates a date-string of the form "year-month-day"
static java.lang.String normalize(java.lang.CharSequence s)
          Normalizes all dates in a String
static java.lang.String normalize(java.lang.CharSequence s, Language language)
          Normalizes all dates in a String Note: If you bugfix something in this version, please check for applying the same fix at the position change tracking function below
static java.lang.String normalize(java.lang.CharSequence s, Language language, PositionTracker posTracker)
          Normalizes all dates in a String keeping track of the position changes with respect to the newly created string Note: If you bugfix something in this version, please try and check for applying the same fix at the non-tracking function
static java.lang.String normalize(java.lang.CharSequence s, PositionTracker posTracker)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

DATE

public static final java.lang.String DATE

DATEPATTERN

public static final java.util.regex.Pattern DATEPATTERN

JUSTDATEPATTERN

public static final java.util.regex.Pattern JUSTDATEPATTERN

SDATE

public static final java.lang.String SDATE

SDATEPATTERN

public static final java.util.regex.Pattern SDATEPATTERN

SIMPLEYEARPATTERN

public static final java.util.regex.Pattern SIMPLEYEARPATTERN
A year as a pattern


YEARPATTERN

public static final java.util.regex.Pattern YEARPATTERN
Constructor Detail

DateParser

public DateParser()
Method Detail

newDate

public static final java.lang.String newDate(java.lang.String y,
                                             java.lang.String m,
                                             java.lang.String d)
Creates a date-string of the form "year-month-day"


newDate

public static final java.lang.String newDate(int y,
                                             int m,
                                             int d)
Creates a date-string from a day, month and year as ints


newSubDate

public static final java.lang.String newSubDate(java.lang.String y,
                                                java.lang.String m)
Creates a date-string of the form "year-month-day"


normalize

public static java.lang.String normalize(java.lang.CharSequence s)
Normalizes all dates in a String


normalize

public static java.lang.String normalize(java.lang.CharSequence s,
                                         Language language)
Normalizes all dates in a String Note: If you bugfix something in this version, please check for applying the same fix at the position change tracking function below


normalize

public static java.lang.String normalize(java.lang.CharSequence s,
                                         PositionTracker posTracker)

normalize

public static java.lang.String normalize(java.lang.CharSequence s,
                                         Language language,
                                         PositionTracker posTracker)
Normalizes all dates in a String keeping track of the position changes with respect to the newly created string Note: If you bugfix something in this version, please try and check for applying the same fix at the non-tracking function


getAllDates

public static java.util.Collection<java.lang.String> getAllDates(java.lang.CharSequence s)
Normalizes all dates in a String


getAllDates

public static java.util.Collection<java.lang.String> getAllDates(java.lang.CharSequence s,
                                                                 Language language)
Normalizes all dates in a String


getAllDatePositions

public static java.util.Collection<Triple<java.lang.String,java.lang.Integer,java.lang.Integer>> getAllDatePositions(java.lang.CharSequence s)
Returns all the dates values and their position in the text


getAllDatePositions

public static java.util.Collection<Triple<java.lang.String,java.lang.Integer,java.lang.Integer>> getAllDatePositions(java.lang.CharSequence s,
                                                                                                                     Language language)
Returns all the dates values and their position in the text


getDate

public static java.lang.String[] getDate(java.lang.CharSequence d,
                                         int[] pos)
Returns the components of the date (year, month, day) in a normalized date string (or null) and writes the start and end position in pos[0] and pos[1]


getDates

public static java.util.List<java.lang.String> getDates(java.lang.CharSequence d)
Returns the dates in a normalized date string


getDate

public static java.lang.String[] getDate(java.lang.CharSequence d)
Returns the components of the date (year, month, day) in a normalized date string (or null)


isDate

public static boolean isDate(java.lang.CharSequence s)
Tells whether this string is a normlized date (and nothing else)


asCalendar

public static java.util.Calendar asCalendar(int[] date)
Converts a normalized Date to a Calendar


asCalendar

public static java.util.Calendar asCalendar(java.lang.String[] date)

asCalendar

public static java.util.Calendar asCalendar(java.lang.String date)

asInts

public static int[] asInts(java.lang.String[] yearMonthDay)
Parses the normalized date into ints, putting Integer.MAX_VALUE for '#'. This looses partial information in the year!! (e.g. 18## -> ####)


isEarlier

public static boolean isEarlier(int[] date1,
                                int[] date2)
TRUE if the first date is earlier than the second. This does not define a total order on dates, as,e.g., 1800-##-## is neither earlier nor later than 1800-05-##


isEarlier

public static boolean isEarlier(java.lang.String[] date1,
                                java.lang.String[] date2)
TRUE if the first date is earlier than the second. This does not define a total order on dates, as,e.g., 1800-##-## is neither earlier nor later than 1800-05-##


isEarlier

public static boolean isEarlier(java.lang.String date1,
                                java.lang.String date2)

includes

public static boolean includes(int[] date1,
                               int[] date2)
TRUE if the first date includes the second, e.g., 1800-##-## includes 1800-05-##


includes

public static boolean includes(java.lang.String[] date1,
                               java.lang.String[] date2)

includes

public static boolean includes(java.lang.String date1,
                               java.lang.String date2)

equal

public static boolean equal(int[] date1,
                            int[] date2)
TRUE if the dates are exactly equal, including '#'


equal

public static boolean equal(java.lang.String[] date1,
                            java.lang.String[] date2)

equal

public static boolean equal(java.lang.String date1,
                            java.lang.String date2)

disjoint

public static boolean disjoint(int[] date1,
                               int[] date2)
TRUE if the dates are disjoint, i.e. none includes the other


disjoint

public static boolean disjoint(java.lang.String[] date1,
                               java.lang.String[] date2)

disjoint

public static boolean disjoint(java.lang.String date1,
                               java.lang.String date2)

main

public static void main(java.lang.String[] argv)
                 throws java.lang.Exception
Test routine

Throws:
java.lang.Exception