javatools.parsers
Class DateParser1

java.lang.Object
  extended by javatools.parsers.DateParser1

public class DateParser1
extends java.lang.Object

This class is part of the Java Tools (see http://mpii.de/yago-naga/javatools). It is licensed under the Creative Commons Attribution License (see http://creativecommons.org/licenses/by/3.0) by the YAGO-NAGA team (see http://mpii.de/yago-naga). The DateParser normalizes date expressions in english natural language text to ISO-dates

year-month-day

where year is either positive negative. The DateParser understands expressions like "4th century BC" or "3rd of November 2004". Dates may be underspecified: The character '#' stands for "at least one digit".
Example:
         DateParser.normalize("It was November 23rd to 24th 1998.")
         --> "It was 1998-11-23 to 1998-11-24."
         DateParser.getDate("It was 1998-11-23 to 1998-11-24.")
         -->  1998, 11, 23
         NumberFormatter.ISOtime(DateParser.getCalendar("November 24th 1998"))
         --> 1998-12-24 T 00:00:00.00
   


Field Summary
static java.lang.String B
          A blank as a RegEx
static java.lang.String DATE
           
static java.util.regex.Pattern DATEPATTERN
           
static java.lang.String NORMALIZEDATE
          A Date as a capturing RegEx
static java.util.regex.Pattern NORMALIZEDATEPATTERN
           
static java.util.regex.Pattern NOYEARPATTERN
           
static java.util.regex.Pattern REPLACEATEPATTERN
           
static java.lang.String REPLACEDATE
           
static java.lang.String SDATE
           
static java.util.regex.Pattern SDATEPATTERN
           
static java.util.regex.Pattern SIMPLEYEARPATTERN
          A year as a pattern
static java.util.regex.Pattern YEARPATTERN
           
 
Constructor Summary
DateParser1()
           
 
Method Summary
static java.util.Calendar asCalendar(int[] date)
          Converts a normalized Date to a Calendar
static java.util.Calendar asCalendar(java.lang.String date)
           
static java.util.Calendar asCalendar(java.lang.String[] date)
           
static int[] asInts(java.lang.String[] yearMonthDay)
          Parses the normalized date into ints, putting Integer.MAX_VALUE for '#'.
static boolean disjoint(int[] date1, int[] date2)
          TRUE if the dates are disjoint, i.e.
static boolean disjoint(java.lang.String[] date1, java.lang.String[] date2)
           
static boolean disjoint(java.lang.String date1, java.lang.String date2)
           
static boolean equal(int[] date1, int[] date2)
          TRUE if the dates are exactly equal, including '#'
static boolean equal(java.lang.String[] date1, java.lang.String[] date2)
           
static boolean equal(java.lang.String date1, java.lang.String date2)
           
static java.util.Collection<java.lang.String> getAllDates(java.lang.CharSequence s)
          Normalizes all dates in a String and get these dates
static java.lang.String[] getDate(java.lang.CharSequence d)
          Returns the components of the date (year, month, day) in a normalized date string (or null)
static java.lang.String[] getDate(java.lang.CharSequence d, int[] pos)
          Returns the components of the date (year, month, day) in a normalized date string (or null) and writes the start and end position in pos[0] and pos[1]
static boolean includes(int[] date1, int[] date2)
          TRUE if the first date includes the second, e.g., 1800-##-## includes 1800-05-##
static boolean includes(java.lang.String[] date1, java.lang.String[] date2)
           
static boolean includes(java.lang.String date1, java.lang.String date2)
           
static boolean isDate(java.lang.CharSequence s)
          Tells whether this string is a normlized date (and nothing else)
static boolean isEarlier(int[] date1, int[] date2)
          TRUE if the first date is earlier than the second.
static boolean isEarlier(java.lang.String[] date1, java.lang.String[] date2)
          TRUE if the first date is earlier than the second.
static boolean isEarlier(java.lang.String date1, java.lang.String date2)
           
static void main(java.lang.String[] argv)
          Test routine
static java.lang.String newDate(int y, int m, int d)
          Creates a date-string from a day, month and year as ints
static java.lang.String newDate(java.lang.String y, java.lang.String m, java.lang.String d)
          Creates a date-string of the form "year-month-day"
static java.lang.String newSubDate(java.lang.String y, java.lang.String m)
          Creates a date-string of the form "year-month-day"
static java.lang.String normalize(java.lang.CharSequence s)
          Normalizes all dates in a String
static java.lang.String normalizeIfContainsTemp(java.lang.CharSequence s)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

NORMALIZEDATE

public static final java.lang.String NORMALIZEDATE
A Date as a capturing RegEx


NORMALIZEDATEPATTERN

public static final java.util.regex.Pattern NORMALIZEDATEPATTERN

DATE

public static final java.lang.String DATE

DATEPATTERN

public static final java.util.regex.Pattern DATEPATTERN

SDATE

public static final java.lang.String SDATE

SDATEPATTERN

public static final java.util.regex.Pattern SDATEPATTERN

REPLACEDATE

public static final java.lang.String REPLACEDATE

REPLACEATEPATTERN

public static final java.util.regex.Pattern REPLACEATEPATTERN

NOYEARPATTERN

public static final java.util.regex.Pattern NOYEARPATTERN

SIMPLEYEARPATTERN

public static final java.util.regex.Pattern SIMPLEYEARPATTERN
A year as a pattern


YEARPATTERN

public static final java.util.regex.Pattern YEARPATTERN

B

public static final java.lang.String B
A blank as a RegEx

See Also:
Constant Field Values
Constructor Detail

DateParser1

public DateParser1()
Method Detail

newDate

public static final java.lang.String newDate(java.lang.String y,
                                             java.lang.String m,
                                             java.lang.String d)
Creates a date-string of the form "year-month-day"


newSubDate

public static final java.lang.String newSubDate(java.lang.String y,
                                                java.lang.String m)
Creates a date-string of the form "year-month-day"


newDate

public static final java.lang.String newDate(int y,
                                             int m,
                                             int d)
Creates a date-string from a day, month and year as ints


normalize

public static java.lang.String normalize(java.lang.CharSequence s)
Normalizes all dates in a String


getAllDates

public static java.util.Collection<java.lang.String> getAllDates(java.lang.CharSequence s)
Normalizes all dates in a String and get these dates


normalizeIfContainsTemp

public static java.lang.String normalizeIfContainsTemp(java.lang.CharSequence s)

getDate

public static java.lang.String[] getDate(java.lang.CharSequence d,
                                         int[] pos)
Returns the components of the date (year, month, day) in a normalized date string (or null) and writes the start and end position in pos[0] and pos[1]


getDate

public static java.lang.String[] getDate(java.lang.CharSequence d)
Returns the components of the date (year, month, day) in a normalized date string (or null)


isDate

public static boolean isDate(java.lang.CharSequence s)
Tells whether this string is a normlized date (and nothing else)


asCalendar

public static java.util.Calendar asCalendar(int[] date)
Converts a normalized Date to a Calendar


asCalendar

public static java.util.Calendar asCalendar(java.lang.String[] date)

asCalendar

public static java.util.Calendar asCalendar(java.lang.String date)

asInts

public static int[] asInts(java.lang.String[] yearMonthDay)
Parses the normalized date into ints, putting Integer.MAX_VALUE for '#'. This looses partial information in the year!! (e.g. 18## -> ####)


isEarlier

public static boolean isEarlier(int[] date1,
                                int[] date2)
TRUE if the first date is earlier than the second. This does not define a total order on dates, as,e.g., 1800-##-## is neither earlier nor later than 1800-05-##


isEarlier

public static boolean isEarlier(java.lang.String[] date1,
                                java.lang.String[] date2)
TRUE if the first date is earlier than the second. This does not define a total order on dates, as,e.g., 1800-##-## is neither earlier nor later than 1800-05-##


isEarlier

public static boolean isEarlier(java.lang.String date1,
                                java.lang.String date2)

includes

public static boolean includes(int[] date1,
                               int[] date2)
TRUE if the first date includes the second, e.g., 1800-##-## includes 1800-05-##


includes

public static boolean includes(java.lang.String[] date1,
                               java.lang.String[] date2)

includes

public static boolean includes(java.lang.String date1,
                               java.lang.String date2)

equal

public static boolean equal(int[] date1,
                            int[] date2)
TRUE if the dates are exactly equal, including '#'


equal

public static boolean equal(java.lang.String[] date1,
                            java.lang.String[] date2)

equal

public static boolean equal(java.lang.String date1,
                            java.lang.String date2)

disjoint

public static boolean disjoint(int[] date1,
                               int[] date2)
TRUE if the dates are disjoint, i.e. none includes the other


disjoint

public static boolean disjoint(java.lang.String[] date1,
                               java.lang.String[] date2)

disjoint

public static boolean disjoint(java.lang.String date1,
                               java.lang.String date2)

main

public static void main(java.lang.String[] argv)
                 throws java.lang.Exception
Test routine

Throws:
java.lang.Exception