": 0, "T": 1, "h": 2, "e": 3, " ": 4, "U": 5, ".": 6, "S": 7, "C": 8, "n": 9, "t": 10, "r": 11, "s": 12, "f": 13, "o": 14, "D": 15, "i": 16, "a": 17, "l": 18, "d": 19, "P": 20, "v": 21, "y": 22, "c": 23, "m": 24, "u": 25, "b": 26, "k": 27, ",": 28, "g": 29, "p": 30, "w": 31, "\n": 32, "W": 33, "M": 34, "z": 35, "-": 36, "2": 37, "0": 38, "9": 39, "N": 40, "\"": 41, ":": 42, "O": 43, "&": 44, "Y": 45, "E": 46, "'": 47, "!": 48, "F": 49, "8": 50, "6": 51, "3": 52, "7": 53, "(": 54, "5": 55, "/": 56, ")": 57, "4": 58, "A": 59, "?": 60, "R": 61, "K": 62, "J": 63, "B": 64, "1": 65, "L": 66, "j": 67, "I": 68, "V": 69, "H": 70, "G": 71, ";": 72, "x": 73, "X": 74, "q": 75, "\u2022": 76, "$": 77, "Q": 78, "Z": 79, "\u00a3": 80, "%": 81, "~": 82, "=": 83, "\u02bc": 84, "\u00b7": 85, "@": 86, "\u00e1": 87, "\u00eb": 88, "\u00b0": 89, "+": 90, "\u00f1": 91, "[": 92, "]": 93, "\u00bb": 94, "#": 95, "_": 96, "\u00e9": 97, "|": 98, "\u00bc": 99, "\u00ed": 100, "\u00fc": 101, "\ufffd": 102, "\u00f6": 103, "\u00bf": 104, "\u20ac": 105, "*": 106, "\u00e7": 107, "\u00e8": 108, ">": 109, "\u00a9": 110, "\u00e0": 111, "\u00bd": 112, "\u00f3": 113, "\u00ae": 114, "\u00c1": 115, "\u00f4": 116, "\u2011": 117, "\u00c2": 118, "\u00e4": 119, "\u00ea": 120, "\u00b4": 121, "<": 122, "\u00c3": 123, "\u201a": 124, "\u00c9": 125, "\u00e2": 126, "\u00cd": 127, "\u00b6": 128, "\u00ab": 129, "\u0095": 130, "\u00e3": 131, "^": 132, "\u00a2": 133, "\u00ef": 134, "\u00ee": 135, "\u201f": 136, "\u00f9": 137, "\u00ba": 138, "\u00fa": 139, "\u2665": 140, "\u00be": 141, "\u25cf": 142, "\u00d7": 143, "\u00fe": 144, "\u0161": 145, "{": 146, "}": 147, "\u00f8": 148, "\ufb01": 149, "\u0175": 150, "\u00dc": 151, "\u00d3": 152, "\uf06e": 153, "\u00c0": 154, "\u00ac": 155, "\u00a4": 156, "\u00f5": 157, "\u0160": 158, "\u00e5": 159, "\u00b2": 160, "\u0410": 161, "\u02da": 162, "\\": 163, "\ue00d": 164, "\u00c5": 165, "\u2020": 166, "\u00b5": 167, "\u2032": 168, "\u017e": 169, "\u0107": 170, "\u0115": 171, "\u20a4": 172, "\u00d6": 173, "\u00c7": 174, "\uffe1": 175, "\u2206": 176, "\u00a1": 177, "\u00a5": 178, "\u00d8": 179, "\u0153": 180, "\u00e6": 181, "\u2463": 182, "\uff08": 183, "\uff09": 184, "\u2010": 185, "\u011b": 186, "\u200e": 187, "\u0105": 188, "\udb86\udfe0": 189, "\u015b": 190, "\u266b": 191, "\u0177": 192, "\uf0a7": 193, "\u00f2": 194, "\ufb00": 195, "\u2500": 196, "\u009c": 197, "\u0080": 198, "\u0083": 199, "\u00d1": 200, "\u02c7": 201, "\u00fb": 202, "\u00b1": 203, "\u0141": 204, "\u1ea3": 205, "\u010d": 206, "\uf095": 207, "\uf020": 208, "\u2212": 209, "\u2122": 210, "\u00ff": 211, "\u25a0": 212, "\u2015": 213, "\u00f0": 214, "\u017d": 215, "\u00b9": 216, "\u00de": 217, "\u017c": 218, "\u00b3": 219, "\u0451": 220, "\u0092": 221, "\u00ce": 222, "\u00a7": 223, "\uf02d": 224, "\u00df": 225, "\u0142": 226}
\ No newline at end of file
diff --git a/timelines/tweaked-timenorm/src/main/scala/org/clulab/time/Temporal.scala b/timelines/tweaked-timenorm/src/main/scala/org/clulab/time/Temporal.scala
deleted file mode 100755
index 8b4f923..0000000
--- a/timelines/tweaked-timenorm/src/main/scala/org/clulab/time/Temporal.scala
+++ /dev/null
@@ -1,338 +0,0 @@
-package org.clulab.time
-
-import java.time.temporal.TemporalField
-import java.time.temporal.TemporalUnit
-import java.time.temporal.ChronoField._
-import java.time.temporal.ChronoUnit._
-import java.time.temporal.ValueRange
-import java.time.temporal.TemporalAccessor
-import java.time.temporal.Temporal
-import java.time.Duration
-import java.time.LocalDate
-import java.time.MonthDay
-import java.time.temporal.WeekFields
-import java.util.Locale
-
-
-abstract class PartialRange(name: String, val field: TemporalField)
-extends TemporalUnit {
- def first(temporal: TemporalAccessor): Long
- def last(temporal: TemporalAccessor): Long
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange
- def addToSize(dateTime: Temporal, periodToAdd: Long): Long
- def range: ValueRange
- def getDuration: Duration
- def isDurationEstimated: Boolean
-
- def isDateBased: Boolean = this.field.isDateBased
- def isTimeBased: Boolean = this.field.isTimeBased
- override def isSupportedBy(temporal: Temporal): Boolean = this.field.isSupportedBy(temporal)
- def addTo[R <: Temporal](temporal: R, amount: Long): R = {
- val size = this.addToSize(temporal, amount)
- this.field.getBaseUnit().addTo(temporal, amount * size)
- }
-
- def between(temporal1Inclusive: Temporal, temporal2Exclusive: Temporal): Long = ???
-
- protected def size(first: Long, last: Long, rangeMinimum: Long, rangeMaximum: Long): Long = {
- if (first < last) {
- last - first + 1L
- } else {
- val firstToMax = rangeMaximum - first + 1L
- val minToLast = last - rangeMinimum + 1L
- firstToMax + minToLast
- }
- }
-}
-
-abstract class ConstantPartialRange(
- name: String,
- field: TemporalField,
- val first: Long,
- val last: Long) extends PartialRange(name, field) {
- private val fixedSize = {
- this.size(first, last, this.field.range().getMinimum(), this.field.range().getMaximum())
- }
- def first(temporal: TemporalAccessor): Long = this.first
- def last(temporal: TemporalAccessor): Long = this.last
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def addToSize(dateTime: Temporal, periodToAdd: Long): Long = this.fixedSize
- private val rangeMin = this.field.range().getMinimum()
- val range: ValueRange = ValueRange.of(this.rangeMin, this.rangeMin + this.fixedSize - 1)
- val getDuration: Duration = Duration.of(this.fixedSize, this.field.getBaseUnit())
- val isDurationEstimated: Boolean = this.field.getBaseUnit().isDurationEstimated()
-}
-
-abstract class MonthDayPartialRange(
- name: String,
- val first: MonthDay,
- val last: MonthDay) extends PartialRange(name, DAY_OF_YEAR) {
- def first(temporal: TemporalAccessor): Long = {
- this.first.atYear(YEAR.checkValidIntValue(YEAR.getFrom(temporal))).get(DAY_OF_YEAR)
- }
- def last(temporal: TemporalAccessor): Long = {
- this.last.atYear(YEAR.checkValidIntValue(YEAR.getFrom(temporal))).get(DAY_OF_YEAR)
- }
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def addToSize(dateTime: Temporal, periodToAdd: Long): Long = {
- val first = this.first(dateTime)
- val last = this.last(dateTime)
- // partial range does not stretch across range boundaries
- if (first < last) {
- val range = this.field.rangeRefinedBy(dateTime)
- this.size(first, last, range.getMinimum(), range.getMaximum())
- }
- // partial range stretches across two ranges; look at the first partial range
- else if (periodToAdd < 0) {
- val prevDateTime = this.field.getRangeUnit().addTo(dateTime, -1L)
- val prevFirst = this.first(prevDateTime)
- val max = this.field.rangeRefinedBy(prevDateTime).getMaximum()
- val min = this.field.rangeRefinedBy(dateTime).getMinimum()
- this.size(prevFirst, last, min, max)
- }
- // partial range stretches across two ranges; look at the second partial range
- else {
- val nextDateTime = this.field.getRangeUnit().addTo(dateTime, 1L)
- val nextLast = this.last(nextDateTime)
- val max = this.field.rangeRefinedBy(dateTime).getMaximum()
- val min = this.field.rangeRefinedBy(nextDateTime).getMinimum()
- this.size(first, nextLast, min, max)
- }
- }
- private val sizes = for (year <- Set(1999, 2000)) yield {
- this.addToSize(LocalDate.of(year, 1, 1), +1L)
- }
- val range: ValueRange = ValueRange.of(1, this.sizes.min, this.sizes.max)
- val getDuration: Duration = Duration.of(this.sizes.min, DAYS)
- val isDurationEstimated: Boolean = true
-}
-
-class BaseUnitOfPartial(name: String, partialRange: PartialRange)
-extends TemporalField {
- override def getDisplayName(locale: Locale): String = this.name
- def getBaseUnit: TemporalUnit = this.partialRange.field.getBaseUnit()
- def getRangeUnit: TemporalUnit = this.partialRange
- def isDateBased: Boolean = this.getBaseUnit.isDateBased && this.getRangeUnit.isDateBased
- def isTimeBased: Boolean = this.getBaseUnit.isTimeBased && this.getRangeUnit.isTimeBased
- def range: ValueRange = this.partialRange.range
- def getFrom(temporal: TemporalAccessor): Long = {
- val baseValue = this.partialRange.field.getFrom(temporal)
- val first = this.partialRange.first(temporal)
- if (baseValue >= first) {
- this.partialRange.rangeRefinedBy(temporal).getMinimum() + baseValue - first
- } else {
- val maxValue = this.partialRange.field.rangeRefinedBy(temporal).getMaximum()
- maxValue - first + 1 + baseValue
- }
- }
- def isSupportedBy(temporal: TemporalAccessor): Boolean = HOUR_OF_DAY.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.partialRange.rangeRefinedBy(temporal)
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = {
- val range = this.partialRange.field.rangeRefinedBy(temporal)
- val rangeMin = range.getMinimum()
- val rangeMax = range.getMaximum()
- val first = this.partialRange.first(temporal)
- val value = first + newValue - rangeMin
- val adjustedValue = if (value <= rangeMax) value else value - rangeMax
- this.partialRange.field.adjustInto(temporal, adjustedValue)
- }
-}
-
-class PartialOfRangeUnit(name: String, partialRange: PartialRange)
-extends TemporalField {
- def getBaseUnit: TemporalUnit = this.partialRange
- def getRangeUnit: TemporalUnit = this.partialRange.field.getRangeUnit()
- def isDateBased: Boolean = this.getBaseUnit.isDateBased && this.getRangeUnit.isDateBased
- def isTimeBased: Boolean = this.getBaseUnit.isTimeBased && this.getRangeUnit.isTimeBased
- def range: ValueRange = ValueRange.of(0, 1)
- def getFrom(temporal: TemporalAccessor): Long = {
- if (this.contains(temporal)) 1L else 0L
- }
- def isSupportedBy(temporal: TemporalAccessor): Boolean = HOUR_OF_DAY.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = newValue match {
- case 1 =>
- if (this.contains(temporal)) temporal
- else this.partialRange.field.adjustInto(temporal, this.partialRange.first(temporal))
- }
- def contains(temporal: TemporalAccessor): Boolean = {
- val first = this.partialRange.first(temporal)
- val last = this.partialRange.last(temporal)
- val value = this.partialRange.field.getFrom(temporal)
- if (first < last) first <= value && value <= last
- else first <= value || value <= last
- }
-}
-
-object MORNINGS extends ConstantPartialRange("Mornings", HOUR_OF_DAY, 0L, 11L)
-object MORNING_OF_DAY extends PartialOfRangeUnit("MorningOfDay", MORNINGS)
-object HOUR_OF_MORNING extends BaseUnitOfPartial("HourOfMorning", MORNINGS)
-
-object AFTERNOONS extends ConstantPartialRange("Afternoons", HOUR_OF_DAY, 12L, 17L)
-object AFTERNOON_OF_DAY extends PartialOfRangeUnit("AfternoonOfDay", AFTERNOONS)
-object HOUR_OF_AFTERNOON extends BaseUnitOfPartial("HourOfAfternoon", AFTERNOONS)
-
-object EVENINGS extends ConstantPartialRange("Evenings", HOUR_OF_DAY, 17L, 23L)
-object EVENING_OF_DAY extends PartialOfRangeUnit("EveningOfDay", EVENINGS)
-object HOUR_OF_EVENING extends BaseUnitOfPartial("HourOfEvening", EVENINGS)
-
-object NIGHTS extends ConstantPartialRange("Nights", HOUR_OF_DAY, 21L, 3L)
-object NIGHT_OF_DAY extends PartialOfRangeUnit("NightOfDay", NIGHTS)
-object HOUR_OF_NIGHT extends BaseUnitOfPartial("HourOfNight", NIGHTS)
-
-object WEEKENDS extends ConstantPartialRange("Weekends", DAY_OF_WEEK, 6L, 7L)
-object WEEKEND_OF_WEEK extends PartialOfRangeUnit("WeekendOfWeek", WEEKENDS)
-object DAY_OF_WEEKEND extends BaseUnitOfPartial("DayOfWeekend", WEEKENDS)
-
-object SPRINGS extends MonthDayPartialRange(
- "Springs", MonthDay.of(3, 20), MonthDay.of(6, 20))
-object SPRING_OF_YEAR extends PartialOfRangeUnit("SpringOfYear", SPRINGS)
-object DAY_OF_SPRING extends BaseUnitOfPartial("DayOfSpring", SPRINGS)
-
-object SUMMERS extends MonthDayPartialRange(
- "Summers", MonthDay.of(6, 21), MonthDay.of(9, 21))
-object SUMMER_OF_YEAR extends PartialOfRangeUnit("SummerOfYear", SUMMERS)
-object DAY_OF_SUMMER extends BaseUnitOfPartial("DayOfSummer", SUMMERS)
-
-object FALLS extends MonthDayPartialRange(
- "Falls", MonthDay.of(9, 22), MonthDay.of(12, 20))
-object FALL_OF_YEAR extends PartialOfRangeUnit("FallOfYear", FALLS)
-object DAY_OF_FALL extends BaseUnitOfPartial("DayOfFall", FALLS)
-
-object WINTERS extends MonthDayPartialRange(
- "Winters", MonthDay.of(12, 21), MonthDay.of(3, 19))
-object WINTER_OF_YEAR extends PartialOfRangeUnit("WinterOfYear", WINTERS)
-object DAY_OF_WINTER extends BaseUnitOfPartial("DayOfWinter", WINTERS)
-
-
-object EASTER_DAY_OF_YEAR extends TemporalField {
- def getBaseUnit: TemporalUnit = DAYS
- def getRangeUnit: TemporalUnit = YEARS
- def isDateBased: Boolean = true
- def isTimeBased: Boolean = false
- def range: ValueRange = ValueRange.of(0, 1)
- def getFrom(temporal: TemporalAccessor): Long = {
- val (_, _, isEaster) = this.getFromEasterMonthDayIsEaster(temporal)
- if (isEaster) 1 else 0
- }
- def isSupportedBy(temporal: TemporalAccessor): Boolean = DAY_OF_WEEK.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = {
- val (easterMonth, easterDay, isEaster) = this.getFromEasterMonthDayIsEaster(temporal)
- newValue match {
- case 0 => if (isEaster) DAYS.addTo(temporal, 1) else temporal
- case 1 => DAY_OF_MONTH.adjustInto(MONTH_OF_YEAR.adjustInto(temporal, easterMonth), easterDay)
- }
- }
-
- private def getFromEasterMonthDayIsEaster(temporal: TemporalAccessor): (Int, Int, Boolean) = {
- val year = YEAR.checkValidIntValue(YEAR.getFrom(temporal))
- // from http://aa.usno.navy.mil/faq/docs/easter.php
- val century = year / 100
- val n = year - 19 * ( year / 19 )
- val k = ( century - 17 ) / 25
- var i = century - century / 4 - ( century - k ) / 3 + 19 * n + 15
- i = i - 30 * ( i / 30 )
- i = i - ( i / 28 ) * ( 1 - ( i / 28 ) * ( 29 / ( i + 1 ) )
- * ( ( 21 - n ) / 11 ) )
- var j = year + year / 4 + i + 2 - century + century / 4
- j = j - 7 * ( j / 7 )
- val l = i - j
- val month = 3 + ( l + 40 ) / 44
- val day = l + 28 - 31 * ( month / 4 )
- (month, day, MONTH_OF_YEAR.getFrom(temporal) == month && DAY_OF_MONTH.getFrom(temporal) == day)
- }
-}
-
-object ISO_WEEK {
- val OF_YEAR = WeekFields.ISO.weekOfYear()
-}
-
-object DECADE extends TemporalField {
- def getBaseUnit: TemporalUnit = DECADES
- def getRangeUnit: TemporalUnit = DECADES
- def isDateBased: Boolean = true
- def isTimeBased: Boolean = false
- def range: ValueRange = ValueRange.of(-999, +999)
- def getFrom(temporal: TemporalAccessor): Long = YEAR.getFrom(temporal) / 10
- def isSupportedBy(temporal: TemporalAccessor): Boolean = YEAR.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = YEAR.adjustInto(temporal, newValue * 10)
-}
-
-object YEAR_OF_DECADE extends TemporalField {
- def getBaseUnit: TemporalUnit = YEARS
- def getRangeUnit: TemporalUnit = DECADES
- def isDateBased: Boolean = true
- def isTimeBased: Boolean = false
- def range: ValueRange = ValueRange.of(0, 9)
- def getFrom(temporal: TemporalAccessor): Long = YEAR.getFrom(temporal) % 10
- def isSupportedBy(temporal: TemporalAccessor): Boolean = YEAR.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = {
- val oldYear = YEAR.getFrom(temporal)
- YEAR.adjustInto(temporal, oldYear - oldYear % 10L + newValue)
- }
-}
-
-object CENTURY extends TemporalField {
- def getBaseUnit: TemporalUnit = CENTURIES
- def getRangeUnit: TemporalUnit = CENTURIES
- def isDateBased: Boolean = true
- def isTimeBased: Boolean = false
- def range: ValueRange = ValueRange.of(-99, +99)
- def getFrom(temporal: TemporalAccessor): Long = YEAR.getFrom(temporal) / 100
- def isSupportedBy(temporal: TemporalAccessor): Boolean = YEAR.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = YEAR.adjustInto(temporal, newValue * 100)
-}
-
-object DECADE_OF_CENTURY extends TemporalField {
- def getBaseUnit: TemporalUnit = DECADES
- def getRangeUnit: TemporalUnit = CENTURIES
- def isDateBased: Boolean = true
- def isTimeBased: Boolean = false
- def range: ValueRange = ValueRange.of(0, 9)
- def getFrom(temporal: TemporalAccessor): Long = DECADE.getFrom(temporal) % 10L
- def isSupportedBy(temporal: TemporalAccessor): Boolean = DECADE.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = {
- val oldDecade = DECADE.getFrom(temporal)
- DECADE.adjustInto(temporal, oldDecade - oldDecade % 10L + newValue)
- }
-}
-
-object QUARTER_CENTURIES extends TemporalUnit {
- override def isDateBased: Boolean = true
- override def isTimeBased: Boolean = false
- override def getDuration: Duration = YEARS.getDuration.multipliedBy(25L)
- override def isDurationEstimated: Boolean = YEARS.isDurationEstimated
- override def addTo[R <: Temporal](temporal: R, amount: Long): R = YEARS.addTo(temporal, amount * 25)
- override def between(temporal1Inclusive: Temporal, temporal2Exclusive: Temporal): Long =
- YEARS.between(temporal1Inclusive, temporal2Exclusive) / 25
-}
-
-object YEAR_OF_CENTURY extends TemporalField {
- def getBaseUnit: TemporalUnit = YEARS
- def getRangeUnit: TemporalUnit = CENTURIES
- def isDateBased: Boolean = true
- def isTimeBased: Boolean = false
- def range: ValueRange = ValueRange.of(0, 99)
- def getFrom(temporal: TemporalAccessor): Long = YEAR.getFrom(temporal) % 100
- def isSupportedBy(temporal: TemporalAccessor): Boolean = YEAR.isSupportedBy(temporal)
- def rangeRefinedBy(temporal: TemporalAccessor): ValueRange = this.range
- def adjustInto[R <: Temporal](temporal: R, newValue: Long): R = {
- val oldYear = YEAR.getFrom(temporal)
- YEAR.adjustInto(temporal, oldYear - oldYear % 100L + newValue)
- }
-}
-
-object UNSPECIFIED extends TemporalUnit {
- def isDateBased: Boolean = true
- def isTimeBased: Boolean = false
- def getDuration: Duration = FOREVER.getDuration()
- def isDurationEstimated: Boolean = true
- override def isSupportedBy(temporal: Temporal): Boolean = false
- def addTo[R <: Temporal](temporal: R, amount: Long): R = ???
- def between(temporal1Inclusive: Temporal, temporal2Exclusive: Temporal): Long = ???
-}
diff --git a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/Evaluator.scala b/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/Evaluator.scala
deleted file mode 100755
index 3dbac44..0000000
--- a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/Evaluator.scala
+++ /dev/null
@@ -1,157 +0,0 @@
-package org.clulab.timenorm.scfg
-
-import scala.io.{Codec, Source}
-import scala.util.{Failure, Success}
-import java.io.File
-import java.io.PrintWriter
-
-object Evaluator {
- /**
- This program normalizes timexes and compares the results to their gold
- standard normalizations
- */
-
- def main(lang: String, inFile: String, outFile: String): Unit = {
- /**
- Enter the language ("es"/"en") and the input and output paths
-
- Both input and output files are .tsv files with timexes in the 1st column,
- gold normalization value in the 2nd column and, in the output file, system
- normalization value in the 3rd column. Timexes from different documents must
- be separated by newlines, being DCTs the first timexes from each document
- */
-
- // Obtain the data of timexes and gold values from the input file
- val (timexList, goldList) = getContent(inFile)
- // Obtain the normalizations of the timexes.
- val normList = getNormalizations(lang, timexList)
- // Compare gold and system normalizations, write the results and get the
- //sums of timexes and correct normalizations
- val (sumGold, sumNorm) = compareAndWrite(outFile, timexList, goldList, normList)
-
- // Compute number of errors and accuracy
- val sumErrors = sumGold - sumNorm
- val accuracy = sumNorm.toFloat * 100 / sumGold
-
- // Print the final statistics
- println(f"""\n
- |Number of timexes (also DCTs): $sumGold%6d
- |Correct normalizations: $sumNorm%6d
- |Incorrect normalizations: $sumErrors%6d
- |Accuracy: $accuracy%6.2f\n""".stripMargin)
- }
-
- def getContent(inFile: String): (List[String], List[String]) = {
- /** Obtains the content from the input file as timex and value lists */
-
- val source = Source.fromFile(inFile)(Codec.UTF8)
- val lines = try {
- source.getLines.toList
- }
- finally {
- source.close()
- }
- val content = lines.map { line =>
- val split = line.split('\t')
- (split.lift(0).getOrElse(""), split.lift(2).getOrElse(""))
- }.unzip
- println(content)
- content
- }
-
- def getNormalizations(lang: String, timexList: List[String]): List[String] = {
- /** Processes the data, sends timexes and DCTs to the normalizer and returns
- the list with all the normalizations */
-
- // Select the parser for the desired grammar depending on the language
- val parser = lang match {
- case "es" => TemporalExpressionParser.es()
- case "en" => TemporalExpressionParser.en()
- case "it" => TemporalExpressionParser.it()
- }
-
- var dctTimeSpanOpt: Option[TimeSpan] = None
- val normList = timexList.map { timex =>
- // If this is a timex (is not a doc separator):
- if (timex.nonEmpty) {
- println(timex)
- // If this is the first timex in a doc, consider it a DCT
- if (dctTimeSpanOpt.isEmpty)
- dctTimeSpanOpt = Some(mkTimeSpan(timex))
- // Normalize the timex and append the normalization
- normalize(parser, timex, dctTimeSpanOpt.get)
- }
- // If this is a doc separator, empty the DCT timex and append ""
- else {
- dctTimeSpanOpt = None
- ""
- }
- }
-
- normList
- }
-
- def mkTimeSpan(timex: String): TimeSpan = {
- val timeSpan = timex.replace('T', '-').replace(':', '-').split('-').map(_.toInt) match {
- case Array(year, month, day) => TimeSpan.of(year, month, day)
- case Array(year, month, day, hour, minute, second) => TimeSpan.of(year, month, day, hour, minute, second)
- case Array(year, month, day, hour, minute) => TimeSpan.of(year, month, day, hour, minute, 0)
- }
-
- timeSpan
- }
-
- def normalize(parser: TemporalExpressionParser, timex: String, dctTimeSpan: TimeSpan): String = {
- /** Normalizes a timex according to the parser and the DCT TimeSpan.
- DCTs are normalized with respect to themselves */
-
- // Parse the timex with respect to its anchor
- parser.parse(timex, dctTimeSpan) match {
- // If the parser fails, return an empty string as normalization
- case Failure(_) => "-"
- // If the parser successes, return the normalization of the timex
- case Success(temporal) => temporal.timeMLValue
- }
- }
-
- def compareAndWrite(outFile: String, timexList: List[String],
- goldList: List[String], normList: List[String]): (Int, Int) = {
- // Create the output writer
- val printWriter = new PrintWriter(new File(outFile), Codec.UTF8.toString)
- try {
- compareAndWrite(printWriter, timexList, goldList, normList)
- }
- finally {
- printWriter.close()
- }
- }
-
- def compareAndWrite(printWriter: PrintWriter, timexList: List[String],
- goldList: List[String], normList: List[String]): (Int, Int) = {
- /** Writes the results to the printWriter, in "{timex}\t{gold}\t{norm}"
- format, and counts the number of timexes and correct normalizations */
-
- def next(current: Int, condition: Boolean): Int = if (condition) current + 1 else current
-
- // Iterate over timex list and get each timex, gold and norm set
- val goldAndNormCounters = (timexList, goldList, normList).zipped.foldLeft(0, 0) { case ((goldCounter, normCounter), (timex, gold, norm)) =>
- println(s"$timex\t$gold\t$norm")
- val (isGold, isNorm) = if (timex.nonEmpty) {
- // If this is a timex, write the data
- printWriter.println(s"$timex\t$gold\t$norm")
- // If this is a timex, sum a gold value
- // If timex exists in corpus and normalization is equal to gold,
- // sum a correct norm value
- (true, gold != "-" && norm == gold)
- }
- else {
- // If this is a doc separator, write a newline
- printWriter.println()
- (false, false)
- }
-
- (next(goldCounter, isGold), next(normCounter, isNorm))
- }
- goldAndNormCounters
- }
-}
diff --git a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/SynchronousGrammar.scala b/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/SynchronousGrammar.scala
deleted file mode 100755
index 0e84992..0000000
--- a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/SynchronousGrammar.scala
+++ /dev/null
@@ -1,252 +0,0 @@
-package org.clulab.timenorm.scfg
-
-import scala.collection.immutable.{IndexedSeq, Seq}
-
-/**
- * A set of root symbols and synchronous rules that define a synchronous grammar.
- *
- * Non-terminal symbols should be alphanumeric characters enclosed in square brackets, for example,
- * `[Period]`, `[TimeSpan]` or `[Int]`.
- *
- * A single colon can be used to "subtype" a non-terminal, e.g. `[Int:4Digit]` or `[Unit:Singular]`.
- * In such cases, the symbol without the subtype information is called the "basic symbol", e.g.
- * `[Int]` and `[Unit]` are the basic symbols for the preceding two examples.
- *
- * A symbol of the form `[Int:X-Y]`, where X and Y are integers, is treated as a numeric range
- * specification (inclusive of both endpoints). [[SynchronousParser]] has special handling of
- * such ranges.
- *
- * A non-terminal whose basic symbol is `[Nil]` is called a "nil symbol". [[SynchronousParser]] has
- * special handling of such nil symbols.
- *
- * @constructor Create a new grammar from a set of root symbols and a set of synchronous rules.
- * @param rootSymbols The symbols that are allowed to be the root of a parse.
- * @param rules The synchronous rules.
- */
-class SynchronousGrammar(val rootSymbols: Set[String], val rules: Seq[SynchronousGrammar.Rule]) {
-
- private val rulePrefixMap = PrefixMultiMap.empty[String, SynchronousGrammar.Rule]
- for (rule <- rules) {
- this.rulePrefixMap += (rule.sourceSeq, rule)
- }
-
- private val numberRegex = "^\\[Int:(\\d*)-(\\d*)\\]$".r
- private val numberRanges: Set[Range.Inclusive] = (
- for {
- rule <- rules
- numberRegex(begin, end) <- rule.symbol +: rule.sourceSeq
- } yield {
- begin.toInt to end.toInt
- }).toSet
-
- /**
- * Gets all non-terminal symbols whose range allows a particular number.
- *
- * @param number The number whose possible non-terminal symbols are to be found.
- * @return Each non-terminal symbol whose range allows the number.
- */
- def sourceSymbolsForNumber(number: Int): Set[String] = {
- val symbolsWithRanges =
- for (range <- this.numberRanges; if range.contains(number))
- yield "[Int:%d-%d]".format(range.start, range.end)
- symbolsWithRanges + "[Int]"
- }
-
- /**
- * Gets all rules whose source side starts with a token sequence.
- *
- * @param tokens The sequence of source tokens.
- * @return All rules whose source side starts with the given tokens.
- */
- def sourceSeqStartsWith(tokens: Seq[String]): Set[SynchronousGrammar.Rule] = {
- this.rulePrefixMap.getAllWithPrefix(tokens)
- }
-
- /**
- * Gets all rules whose source side starts with a token.
- *
- * @param token The source token.
- * @return All rules whose source side starts with the given token.
- */
- def sourceSeqStartsWith(token: String): Set[SynchronousGrammar.Rule] = {
- this.rulePrefixMap.getAllWithPrefix(Seq(token))
- }
-
- /**
- * Gets all symbols used in the grammar.
- *
- * This includes both terminal and non-terminal symbols
- *
- * @return All symbols in the grammar.
- */
- def sourceSymbols(): Set[String] = {
- this.rules.flatMap(_.sourceSeq).toSet
- }
-}
-
-object SynchronousGrammar {
-
- /**
- * Determines whether a token is a terminal or non-terminal.
- *
- * Currently, tokens must start with "[" and end with "]" to be a non-terminal.
- *
- * @param token A token from a grammar.
- * @return True if the token is a terminal, false otherwise.
- */
- def isTerminal(token: String): Boolean = !token.matches("^\\[.*\\]$")
-
- /**
- * Strips any sub-type information from a non-terminal symbol.
- *
- * For example, `[Int:4Digit]` would be converted to `[Int]`
- *
- * @param token A non-terminal token from the grammar.
- * @return A non-terminal token without the sub-type information.
- */
- def basicSymbol(token: String): String = token.replaceAll(":[^\\]]*", "")
-
- /**
- * Determines whether a token is a number or not.
- *
- * Currently, only tokens that are all digits are considered to be numbers.
- *
- * @param token A token from a grammar.
- * @return True if the token is a number, false otherwise.
- */
- def isNumber(token: String): Boolean = token.matches("^\\d+$")
-
- /**
- * Determines whether a token is a nil non-terminal symbol or not.
- *
- * Currently, only non-terminals whose basic symbol is "[Nil]" are considered to be nils.
- *
- * @param token A token from a grammar.
- * @return True if the token is a nil non-terminal, false otherwise.
- */
- def isNil(token: String): Boolean = this.basicSymbol(token) == "[Nil]"
-
- /**
- * Parses a [[SynchronousGrammar]] from a string representation.
- *
- * The first line defines the one or more root symbols and looks like:
- *
- * ROOTS [Period] [TimeSpan] ...
- *
- * The remaining lines follow the format of Joshua/Heiro
- * (http://joshua-decoder.org/4.0/file-formats.html) and look like:
- *
- * [Period] ||| [Period,1] and [Period,2] ||| Sum [Period,1] [Period,2] ||| 1.0
- * ...
- *
- *
- * @param text The formatted grammar string.
- * @return A new [[SynchronousGrammar]].
- */
- def fromString(text: String): SynchronousGrammar = {
- val stripLabel: (String => String) = _.replaceAll("\\[(.*),.*\\]", "[$1]")
- // workaround instead of just `text.lines` because jdk11 also defines `String.lines`
- val lines = text.linesWithSeparators.map(_.stripLineEnd)
- val firstLine = lines.next
- if (!firstLine.startsWith("ROOTS")) {
- throw new IllegalArgumentException("First line must define root symbols, e.g. ROOTS [XXX] [YYY]")
- }
- val rootSymbols = firstLine.split("\\s+").tail.toSet
- val nonCommentLines = lines.filterNot(_.startsWith("//"))
- val rules = for (line <- nonCommentLines) yield line.trim.split("\\s*[|][|][|]\\s*") match {
- case Array(symbol, sourceSeqString, targetSeqString, scoreString) => {
- val sourceSeqItems = sourceSeqString.split("\\s+").toIndexedSeq
- val targetSeqItems = targetSeqString.split("\\s+").toIndexedSeq
- val sourceNonTerminals = sourceSeqItems.filterNot(this.isTerminal)
- val targetNonTerminals = targetSeqItems.filterNot(this.isTerminal)
- val alignment = for ((token, targetIndex) <- targetNonTerminals.zipWithIndex) yield {
- if (sourceNonTerminals.count(_ == token) != 1) {
- val message = "Expected exactly 1 non-terminal matching \"%s\" in \"%s\""
- throw new IllegalArgumentException(message.format(token, sourceSeqString))
- }
- targetIndex -> sourceNonTerminals.indexOf(token)
- }
- Some(Rule(symbol, sourceSeqItems.map(stripLabel), targetSeqItems.map(stripLabel), alignment.toMap))
- }
- case Array("") => None
- case _ => throw new IllegalArgumentException("\"" + line + "\"")
- }
- new SynchronousGrammar(rootSymbols, rules.flatten.toList)
- }
-
- /**
- * A synchronous grammar rule.
- *
- * @constructor Creates a synchronous grammar rule.
- * @param symbol A non-terminal symbol
- * @param sourceSeq The sequence of source tokens
- * @param targetSeq The sequence of target tokens
- * @param nonTerminalAlignment A mapping from source non-terminal indexes to the corresponding
- * target non-terminal indexes. For example, Map(1->2, 2->1) would indicate
- * that the first source non-terminal is the second target non-terminal and vice versa.
- */
- case class Rule(symbol: String, sourceSeq: IndexedSeq[String], targetSeq: IndexedSeq[String], nonTerminalAlignment: Map[Int, Int]) {
-
- /**
- * Strips any sub-type information from this rule's symbol.
- *
- * See [[SynchronousGrammar.basicSymbol]].
- */
- val basicSymbol = SynchronousGrammar.basicSymbol(this.symbol)
-
- /**
- * Determines whether this rule's symbol is a nil non-terminal symbol or not.
- *
- * See [[SynchronousGrammar.isNil]].
- */
- val isNil = SynchronousGrammar.isNil(this.symbol)
- }
-}
-
-private[timenorm] class PrefixMultiMap[K, V] {
-
- var suffixes = Map.empty[K, PrefixMultiMap[K, V]]
- var values = Set.empty[V]
-
- def +=(key: Seq[K], value: V): Unit = {
- if (key.isEmpty) {
- this.values += value
- } else {
- val head = key.head
- if (!this.suffixes.contains(head)) {
- this.suffixes += head -> new PrefixMultiMap[K, V]
- }
- this.suffixes(head) += (key.tail, value)
- }
- }
-
- def get(key: Seq[K]): Set[V] = {
- this.getMap(key) match {
- case None => Set.empty
- case Some(map) => map.values
- }
- }
-
- def getAll: Set[V] = {
- this.values ++ this.suffixes.values.flatMap(_.getAll)
- }
-
- def getAllWithPrefix(key: Seq[K]): Set[V] = {
- this.getMap(key) match {
- case None => Set.empty
- case Some(map) => map.getAll
- }
- }
-
- private def getMap(key: Seq[K]): Option[PrefixMultiMap[K, V]] = {
- if (key.isEmpty) {
- Some(this)
- } else {
- this.suffixes.get(key.head).flatMap(_.getMap(key.tail))
- }
- }
-}
-
-private[timenorm] object PrefixMultiMap {
- def empty[K, V] = new PrefixMultiMap[K, V]
-}
diff --git a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/SynchronousParser.scala b/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/SynchronousParser.scala
deleted file mode 100755
index 7ea091b..0000000
--- a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/SynchronousParser.scala
+++ /dev/null
@@ -1,262 +0,0 @@
-package org.clulab.timenorm.scfg
-
-import scala.collection.immutable.IndexedSeq
-import scala.collection.mutable
-import scala.collection.mutable.ListBuffer
-import scala.util.{Try, control}
-
-/**
- * A parser for synchronous grammars.
- *
- * @constructor Create a new parser from a synchronous grammar.
- * @param grammar A synchronous grammar.
- */
-class SynchronousParser(grammar: SynchronousGrammar) {
-
- import SynchronousParser._
-
- /**
- * Parse the source tokens into a tree non-terminals and target tokens.
- *
- * @param sourceTokens The source tokens to be parsed.
- * @return The parsed tree of non-terminals and target tokens.
- */
- def parseAll(sourceTokens: Array[String]): Array[Tree.NonTerminal] = {
- this.parseAll(sourceTokens.toIndexedSeq).toArray
- }
-
- /**
- * Parse the source tokens into a tree non-terminals and target tokens.
- *
- * @param sourceTokens The source tokens to be parsed.
- * @return The parsed tree of non-terminals and target tokens.
- */
- def parseAll(sourceTokens: IndexedSeq[String]): IndexedSeq[Tree.NonTerminal] = {
- if (sourceTokens.isEmpty) {
- throw new UnsupportedOperationException("Cannot parse empty token sequence")
- }
- val chart = this.parseChart(sourceTokens)
- val completes = chart(sourceTokens.size)(0).completes
- val roots = completes.filter(parse => this.grammar.rootSymbols.contains(parse.rule.symbol))
- val trees = roots.map(_.toTargetTree).toIndexedSeq
- if (trees.isEmpty) {
- val nTokens = sourceTokens.size
- val completes =
- for {
- size <- 1 to nTokens
- start <- 0 until (nTokens - size + 1)
- complete <- chart(size)(start).completes
- } yield {
- "%s(%s)".format(complete.rule.symbol, sourceTokens.slice(start, start + size).mkString(","))
- }
- val message = "Could not parse %s. Partial parses:\n%s"
- throw new UnsupportedOperationException(message.format(sourceTokens, completes.mkString("\n")))
- }
- trees
- }
-
- /**
- * Attempt to parse the source tokens into a tree of non-terminals and target tokens.
- *
- * @param sourceTokens The source tokens to be parsed.
- * @return Success(trees) if the source tokens could be parsed, Failure otherwise.
- */
- def tryParseAll(sourceTokens: IndexedSeq[String]): Try[IndexedSeq[Tree.NonTerminal]] = {
- control.Exception.catching(classOf[UnsupportedOperationException]).withTry(parseAll(sourceTokens))
- }
-
- private def parseChart(sourceTokens: IndexedSeq[String]): Array[Array[ChartEntry]] = {
- val nTokens = sourceTokens.size
- val chart = Array.tabulate(nTokens + 1, nTokens) {
- (size, start) => if (size == 0 || start + size > nTokens) null else ChartEntry()
- }
-
- // special handling of [Number]: pass through tokens that are numbers
- for (start <- 0 until nTokens) {
- val token = sourceTokens(start)
- if (SynchronousGrammar.isNumber(token)) {
- for (symbol <- grammar.sourceSymbolsForNumber(token.toInt)) {
- val rule = SynchronousGrammar.Rule(symbol, IndexedSeq(token), IndexedSeq(token), Map.empty)
- chart(1)(start).completes += Parse(rule, IndexedSeq.empty)
- }
- }
- }
-
- // fill rules that start with terminals
- for (start <- 0 until nTokens) {
- for (rule <- grammar.sourceSeqStartsWith(sourceTokens(start))) {
- val initialTerminals = rule.sourceSeq.takeWhile(SynchronousGrammar.isTerminal)
- val size = initialTerminals.size
- if (sourceTokens.slice(start, start + size) == initialTerminals) {
- val entry = chart(size)(start)
- if (rule.sourceSeq.size == size) {
- entry.completes += Parse(rule, IndexedSeq.empty)
- } else {
- entry.partials += PartialParse(rule, size, IndexedSeq.empty)
- }
- }
- }
- }
-
- // fill in the chart from the smallest sizes to the biggest sizes
- for (size <- 1 to nTokens; start <- 0 to (nTokens - size)) {
- val entry = chart(size)(start)
-
- // look for ways to create entries of size `size` from the current partial parses
- for (size1 <- 1 until size) {
- val start2 = start + size1
- val size2 = size - size1
- for (partial <- chart(size1)(start).partials) {
-
- // partials that can be advanced to `size` using terminals
- val newSourceSeqIndex = partial.sourceSeqIndex + size2
- val symbolSeq = partial.rule.sourceSeq.slice(partial.sourceSeqIndex, newSourceSeqIndex)
- val tokenSeq = sourceTokens.slice(start2, start2 + size2)
- if (symbolSeq.forall(SynchronousGrammar.isTerminal) && symbolSeq == tokenSeq) {
- if (partial.rule.sourceSeq.size == newSourceSeqIndex) {
- entry.completes += Parse(partial.rule, partial.nonTerminalRules)
- } else {
- entry.partials += PartialParse(partial.rule, newSourceSeqIndex, partial.nonTerminalRules)
- }
- }
-
- // partials that can be advanced to `size` using completed non-terminals
- for (complete <- chart(size2)(start2).completes) {
- if (partial.rule.sourceSeq(partial.sourceSeqIndex) == complete.rule.symbol) {
- val sourceSeqIndex = partial.sourceSeqIndex + 1
- val nonTerminalRules = partial.nonTerminalRules :+ complete
- if (partial.rule.sourceSeq.size == sourceSeqIndex) {
- entry.completes += Parse(partial.rule, nonTerminalRules)
- } else {
- entry.partials += PartialParse(partial.rule, sourceSeqIndex, nonTerminalRules)
- }
- }
- }
- }
-
- // expand complete parses if there are Nil parses beside them
- for (complete1 <- chart(size1)(start).completes) {
- for (complete2 <- chart(size2)(start2).completes) {
- if (!complete1.rule.isNil && complete2.rule.isNil) {
- entry.completes += complete1
- } else if (complete1.rule.isNil && !complete2.rule.isNil) {
- entry.completes += complete2
- }
- }
- }
- }
-
- // create parses for rules that start with any of the currently complete parses
- // NOTE: we have to use a queue here because the loop itself may add more completed
- // rules that we then also need to process
- val queue = mutable.Queue.empty ++ entry.completes
- while (queue.nonEmpty) {
- val complete = queue.dequeue
- for (rule <- grammar.sourceSeqStartsWith(complete.rule.symbol)) {
- if (rule.sourceSeq.tail.isEmpty) {
- val complete2 = Parse(rule, IndexedSeq(complete))
- queue.enqueue(complete2)
- entry.completes += complete2
- } else {
- entry.partials += PartialParse(rule, 1, IndexedSeq(complete))
- }
- }
- }
- }
- chart
- }
-}
-
-object SynchronousParser {
-
- /**
- * A tree of non-terminals and tokens.
- *
- * Used primarily by [[SynchronousParser]] to represent the tree of target non-terminals
- * and terminals that correspond to an input sequence of source terminals.
- */
- sealed trait Tree
-
- /**
- * Contains the different types of [[Tree]]s.
- */
- object Tree {
-
- /**
- * A tree representing a terminal token.
- *
- * Used primarily by [[SynchronousParser]] to represent target terminals.
- *
- * @constructor Creates a terminal tree from a token.
- * @param token A token.
- */
- case class Terminal(token: String) extends Tree
-
- /**
- * A tree representing a non-terminal.
- *
- * Used primarily by [[SynchronousParser]] to represent target non-terminals.
- *
- * @constructor Creates a non-terminal tree from a rule and a list of children.
- * @param rule A synchronous grammar rule.
- * @param children The trees that are children of this non-terminal.
- */
- case class NonTerminal(rule: SynchronousGrammar.Rule, children: List[Tree]) extends Tree
- }
-
- private[SynchronousParser] case class Parse(
- rule: SynchronousGrammar.Rule,
- nonTerminalRules: IndexedSeq[Parse]) {
-
- def toTargetTree: Tree.NonTerminal = {
- var nonTerminalIndex = -1
- val children = for ((token, i) <- this.rule.targetSeq.zipWithIndex) yield {
- if (SynchronousGrammar.isTerminal(token)) {
- Tree.Terminal(token)
- } else {
- nonTerminalIndex += 1
- val nonTerminalRulesIndex = this.rule.nonTerminalAlignment(nonTerminalIndex)
- this.nonTerminalRules(nonTerminalRulesIndex).toTargetTree
- }
- }
- val subtrees = this.insertSubtreesFromParentheses(children.iterator)
- Tree.NonTerminal(rule, subtrees)
- }
-
- private def insertSubtreesFromParentheses(trees: Iterator[Tree]): List[Tree] = {
- if (trees.isEmpty) {
- Nil
- } else {
- val tree = trees.next match {
- case Tree.Terminal("(") => this.parseSubtreeFollowingOpenParentheses(trees)
- case tree => tree
- }
- tree :: this.insertSubtreesFromParentheses(trees)
- }
- }
-
- private def parseSubtreeFollowingOpenParentheses(trees: Iterator[Tree]): Tree = {
- val Tree.Terminal(symbol) = trees.next
- val children = ListBuffer.empty[Tree]
- var getNext = true
- while (getNext) {
- trees.next match {
- case Tree.Terminal(")") => getNext = false
- case Tree.Terminal("(") => children += this.parseSubtreeFollowingOpenParentheses(trees)
- case tree => children += tree
- }
- }
- val rule = SynchronousGrammar.Rule("[" + symbol + "]", IndexedSeq.empty, IndexedSeq.empty, Map.empty)
- Tree.NonTerminal(rule, children.toList)
- }
- }
-
- private[SynchronousParser] case class PartialParse(
- rule: SynchronousGrammar.Rule,
- sourceSeqIndex: Int,
- nonTerminalRules: IndexedSeq[Parse])
-
- private[SynchronousParser] case class ChartEntry(
- completes: mutable.Set[Parse] = mutable.Set.empty,
- partials: mutable.Set[PartialParse] = mutable.Set.empty)
-}
diff --git a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/Temporal.scala b/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/Temporal.scala
deleted file mode 100755
index 5035aaf..0000000
--- a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/Temporal.scala
+++ /dev/null
@@ -1,719 +0,0 @@
-package org.clulab.timenorm.scfg
-
-import java.time.temporal.ChronoField._
-import java.time.temporal.ChronoUnit._
-import java.time.temporal.IsoFields._
-import java.time.temporal.{TemporalField, TemporalUnit}
-import java.time.{LocalDateTime, ZoneId, ZonedDateTime}
-
-import org.clulab.time._
-
-import scala.collection.immutable.ListMap
-
-/**
- * A temporal object, such as a time span or a period.
- */
-sealed trait Temporal {
-
- /**
- * The temporal object, formatted as a
- * [[http://timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3 TIMEX3 value attribute]].
- */
- val timeMLValue: String
-}
-
-/**
- * An unanchored period of time such as "three weeks" or "a year".
- *
- * @constructor Creates a period from units and their amounts.
- * @param unitAmounts A mapping of temporal units to their associated amounts, e.g. `Map(WEEKS->3)`.
- * @param modifier A modifier (for periods that are not exact).
- */
-case class Period(
- unitAmounts: Map[TemporalUnit, Int],
- modifier: Modifier = Modifier.Exact) extends Temporal {
-
- private val simplifyUnitMap = ListMap[TemporalUnit, Seq[(TemporalUnit, Int)]](
- DECADES -> Seq((YEARS, 10)),
- CENTURIES -> Seq((DECADES, 10), (YEARS, 100)))
-
- private val unitChars = ListMap[TemporalUnit, String](
- UNSPECIFIED -> "X",
- CENTURIES -> "CE",
- DECADES -> "DE",
- YEARS -> "Y",
- QUARTER_YEARS -> "Q",
- SPRINGS -> "SP",
- SUMMERS -> "SU",
- FALLS -> "FA",
- WINTERS -> "WI",
- MONTHS -> "M",
- WEEKS -> "W",
- WEEKENDS -> "WE",
- MORNINGS -> "MO",
- AFTERNOONS -> "AF",
- EVENINGS -> "EV",
- NIGHTS -> "NI",
- DAYS -> "D",
- HOURS -> "H",
- MINUTES -> "M",
- SECONDS -> "S")
-
- val timeMLValue: String = {
- val simpleUnitAmounts = this.simplifyUnitMap.foldLeft(this.unitAmounts) {
- case (counts, (unit, unitMultipliers)) => counts.get(unit) match {
- case None => counts
- case Some(Int.MaxValue) => counts
- case Some(value) => unitMultipliers.find(um => counts.contains(um._1)) match {
- case None => counts
- case Some((newUnit, multiplier)) =>
- counts - unit + (newUnit -> (counts(newUnit) + value * multiplier))
- }
- }
- }
- val units = simpleUnitAmounts.keySet.toSeq.sortBy(_.getDuration).reverse
- val parts = for (unit <- units; if simpleUnitAmounts(unit) != 0) yield {
- val amount = simpleUnitAmounts(unit) match {
- case Int.MaxValue => "X"
- case i => i.toString
- }
- val suffix = this.unitChars.get(unit) match {
- case None => throw new UnsupportedOperationException("Don't know how to format " + unit)
- case Some(string) => string
- }
- (unit, amount + suffix)
- }
- val (dateParts, timeParts) = parts.partition(_._1.getDuration.compareTo(HOURS.getDuration) > 0)
- val timeString = if (timeParts.isEmpty) "" else "T" + timeParts.map(_._2).mkString
- "P" + dateParts.map(_._2).mkString + timeString
- }
-
- /**
- * Adds this period to another period.
- */
- def +(that: Period): Period = Period(
- this.mapOverUnion(that, _ + _).toMap,
- this.modifier & that.modifier)
-
- /**
- * Subtracts a period from this period.
- */
- def -(that: Period): Period = Period(
- this.mapOverUnion(that, _ - _).toMap,
- this.modifier & that.modifier)
-
- /**
- * Determines whether this period is longer than the given unit.
- */
- def >(unit: TemporalUnit): Boolean = {
- if (this.unitAmounts.isEmpty) {
- false
- } else {
- val maxUnit = this.unitAmounts.keySet.maxBy(_.getDuration)
- maxUnit.getDuration.compareTo(unit.getDuration) > 0 ||
- (maxUnit == unit && this.unitAmounts(maxUnit) > 1)
- }
- }
-
- /**
- * Moves a time later by the amount of this period.
- */
- def addTo(time: ZonedDateTime): ZonedDateTime = this.unitAmounts.foldLeft(time) {
- case (time, (unit, value)) => time.plus(value, unit)
- }
-
- /**
- * Moves a time earlier by the amount of this period.
- */
- def subtractFrom(time: ZonedDateTime): ZonedDateTime = this.unitAmounts.foldLeft(time) {
- case (time, (unit, value)) => time.minus(value, unit)
- }
-
- private def mapOverUnion(that: Period, op: (Int, Int) => Int): Iterable[(TemporalUnit, Int)] = {
- for (unit <- this.unitAmounts.keySet ++ that.unitAmounts.keySet)
- yield (unit, op(this.unitAmounts.getOrElse(unit, 0), that.unitAmounts.getOrElse(unit, 0)))
- }
-}
-
-/**
- * Factory for creating [[Period]] instances.
- */
-object Period {
-
- /**
- * The period containing nothing.
- */
- final val empty = Period(Map.empty)
-
- /**
- * The period representing an unspecified amount of time.
- */
- final val unspecified = Period(Map(UNSPECIFIED -> Int.MaxValue))
-
- /**
- * Creates a period from a unit and a fractional amount.
- *
- * Only certain types of units that can naturally be divided into smaller units (e.g. 1 year is
- * 12 months) are supported.
- *
- * @param numerator The numerator of the fractional amount.
- * @param denominator The denominator of the fractional amount.
- * @param unit The temporal unit for the period.
- * @param modifier The modifier for the period.
- * @return A period representing the fractional amount
- */
- def fromFractional(numerator: Int, denominator: Int, unit: TemporalUnit, modifier: Modifier = Modifier.Exact): Period = {
- var map = Map(unit -> (numerator / denominator))
- var currRemainder = numerator % denominator
- var currUnit = unit
- while (currRemainder != 0) {
- this.smallerUnit.get(currUnit) match {
- case None => throw new UnsupportedOperationException("Don't know how to split " + currUnit)
- case Some((multiplier, nextUnit)) =>
- val numerator = currRemainder * multiplier
- map += nextUnit -> (numerator / denominator)
- currUnit = nextUnit
- currRemainder = numerator % denominator
- }
- }
- Period(map, modifier)
- }
-
- private final val smallerUnit = Map[TemporalUnit, (Int, TemporalUnit)](
- YEARS -> (12, MONTHS),
- MONTHS -> (30, DAYS),
- WEEKS -> (7, DAYS),
- DAYS -> (24, HOURS),
- HOURS -> (60, MINUTES),
- MINUTES -> (60, SECONDS))
-}
-
-/**
- * An unanchored set of periods, such as "daily" or "two days a week".
- *
- * @constructor Creates a period set from a period and various attributes.
- * @param period The base period of the period set.
- * @param modifier A modifier if the period set is not exact.
- * @param quantifier A quantifier (to capture expressions like the "every" in "every day").
- * @param frequency A frequency (to capture expressions like "two days" in "two days a week").
- */
-case class PeriodSet(
- period: Period,
- modifier: Modifier = Modifier.Exact,
- quantifier: PeriodSet.Quantifier = PeriodSet.Quantifier.None,
- frequency: PeriodSet.Frequency = PeriodSet.Frequency(1)) extends Temporal {
- val timeMLValue = this.period.timeMLValue
-}
-
-/**
- * Provider of different [[PeriodSet]] attributes.
- */
-object PeriodSet {
-
- /**
- * A frequency like "five times", or "two days" in "two days a week".
- *
- * @constructor Creates a frequency from a number and an optional unit.
- * @param times The numeric frequency.
- * @param unit An optional unit for the frequency.
- */
- case class Frequency(val times: Int, val unit: Option[TemporalUnit] = None) {
-
- /**
- * Unifies this frequency with another frequency.
- *
- * Currently only allows identical frequencies to be unified.
- */
- def &(that: Frequency): Frequency = {
- if (this == that && this.unit.isEmpty) {
- this
- } else {
- throw new IllegalArgumentException(
- "cannot combine %s and %s".format(this, that))
- }
- }
- }
-
- /**
- * A quantifier like "every" or "each".
- *
- * @constructor Creates a quantifier from a string
- * @param timeMLValue A
- * [[http://timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3 TIMEX3 quant attribute]]
- * string.
- */
- abstract class Quantifier(val timeMLValue: Option[String]) {
-
- /**
- * Unifies this quantifier with another quantifier.
- *
- * Currently only allows identical quantifiers to be unified.
- */
- def &(that: Quantifier): Quantifier = {
- if (this == that && this.timeMLValue.isEmpty) {
- this
- } else {
- throw new IllegalArgumentException(
- "cannot combine %s and %s".format(this, that))
- }
- }
- }
-
- /**
- * Provider of different of [[Quantifier]] instances.
- */
- object Quantifier {
-
- /**
- * The absence of a quantifier
- */
- case object None extends Quantifier(scala.None)
-
- /**
- * A quantifier for the meaning of "every"
- */
- case object Every extends Quantifier(Some("EVERY"))
-
- /**
- * A quantifier for the meaning of "each"
- */
- case object Each extends Quantifier(Some("EACH"))
-
- private val values = Seq[Quantifier](None, Every, Each)
-
- private val stringToQuantifier =
- (for (quantifier <- values; name <- quantifier.timeMLValue) yield name -> quantifier).toMap
-
- /**
- * Gets the quantifier corresponding to a string value.
- *
- * @param timeMLValue A
- * [[http://timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3 TIMEX3 quant attribute]]
- * string.
- * @return The quantifier for the given string.
- */
- def valueOf(timeMLValue: String): Quantifier = this.stringToQuantifier(timeMLValue)
- }
-}
-
-/**
- * An anchored span of time, such as "2011" or "April 25, 1980".
- *
- * @constructor Creates a new time span from start and end points.
- * @param start The start point of the time span. The span is assumed to include this point.
- * @param end The end point of the time span. The span is assumed to end right before this point.
- * @param period The period between the start and end points. This could be derived from the start
- * and end, but then there would be no way to store differentiate between two
- * representations of the same period (e.g. 7 days vs. 1 week), which matters for generating
- * TimeML values.
- * @param modifier A modifier if the time span is not exact.
- */
-case class TimeSpan(
- start: ZonedDateTime,
- end: ZonedDateTime,
- period: Period,
- modifier: Modifier) extends Temporal {
-
- /**
- * The time span, formatted as a
- * [[http://timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3 TIMEX3 value attribute]]
- * if possible.
- *
- * Time spans that are impossible to represent in a TIMEX3 value (e.g. "the last three weeks")
- * will produce `None`.
- */
- def timeMLValueOption: Option[String] = {
- if (this.start == this.end) {
- Some(this.start.toLocalDateTime.toString)
- } else {
- this.period.unitAmounts.toList match {
- case List((unit, 1)) if TimeSpan.truncate(this.start, unit) == this.start =>
- val fields = TimeSpan.unitToFieldsToDisplay.get(unit) match {
- case None => throw new UnsupportedOperationException("Don't know how to display " + unit)
- case Some(fields) => fields
- }
- val parts = for (field <- fields) yield {
- TimeSpan.fieldFormats.get(field) match {
- case None => throw new UnsupportedOperationException("Don't know how to format " + field)
- case Some(format) => format(this.start.get(field))
- }
- }
- Some(parts.mkString)
- case _ => None
- }
- }
- }
-
- /**
- * The time span, formatted as a
- * [[http://timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3 TIMEX3 value attribute]]
- * if possible.
- *
- * Time spans that are impossible to represent in a TIMEX3 value (e.g. "the last three weeks")
- * will produce the TIMEX3 value for their period (e.g. "three weeks").
- */
- val timeMLValue = this.timeMLValueOption.getOrElse(this.period.timeMLValue)
-}
-
-/**
- * Factory for [[TimeSpan]] instances.
- */
-object TimeSpan {
-
- /**
- * The time point representing an unspecified start point.
- */
- final val unspecifiedStart = ZonedDateTime.of(LocalDateTime.MIN, ZoneId.of("Z"))
-
- /**
- * The time point representing an unspecified end point.
- */
- final val unspecifiedEnd = ZonedDateTime.of(LocalDateTime.MAX, ZoneId.of("Z"))
-
- /**
- * Creates a [[TimeSpan]] from a single day.
- *
- * @param year The time span's year.
- * @param month The time span's month.
- * @param day The time span's day.
- * @return A time span lasting for exactly the given day.
- */
- def of(year: Int, month: Int, day: Int): TimeSpan = {
- val start = ZonedDateTime.of(LocalDateTime.of(year, month, day, 0, 0), ZoneId.of("Z"))
- this.startingAt(start, Period(Map(DAYS -> 1)), Modifier.Exact)
- }
-
- /**
- * Creates a [[TimeSpan]] from a single second.
- *
- * @param year The time span's year.
- * @param month The time span's month.
- * @param day The time span's day.
- * @param hour The time span's hour.
- * @param minute The time span's minute.
- * @param second The time span's second.
- * @return A time span lasting for exactly the given second.
- */
- def of(year: Int, month: Int, day: Int, hour: Int, minute: Int, second: Int): TimeSpan = {
- val localStart = LocalDateTime.of(year, month, day, hour, minute, second)
- val start = ZonedDateTime.of(localStart, ZoneId.of("Z"))
- this.startingAt(start, Period(Map(SECONDS -> 1)), Modifier.Exact)
- }
-
- /**
- * Creates a [[TimeSpan]] from a TimeML value string.
- *
- * @param value A [[http://timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3 TIMEX3 value attribute]]
- * string. The value must be a date/time, not a period/duration.
- * @return The time span corresponding to the TimeML value.
- */
- def fromTimeMLValue(value: String): TimeSpan = {
- val fieldValues: Map[TemporalField, Int] = value.split("[-T:]") match {
- case Array(centuryOrDecadeOrYear) => centuryOrDecadeOrYear.length match {
- case 2 => Map(CENTURY -> centuryOrDecadeOrYear.toInt)
- case 3 => Map(DECADE -> centuryOrDecadeOrYear.toInt)
- case 4 => Map(YEAR -> centuryOrDecadeOrYear.toInt)
- }
- case Array(year, seasonOrQuarterOrMonthOrWeek) => Map[TemporalField, Int](YEAR -> year.toInt) ++ {
- seasonOrQuarterOrMonthOrWeek match {
- case "SP" => Map(SPRING_OF_YEAR -> 1)
- case "SU" => Map(SUMMER_OF_YEAR -> 1)
- case "FA" => Map(FALL_OF_YEAR -> 1)
- case "WI" => Map(WINTER_OF_YEAR -> 1)
- case _ => seasonOrQuarterOrMonthOrWeek.head match {
- case 'W' => Map(ISO_WEEK.OF_YEAR -> seasonOrQuarterOrMonthOrWeek.tail.toInt)
- case 'Q' => Map(QUARTER_OF_YEAR -> seasonOrQuarterOrMonthOrWeek.tail.toInt)
- case _ => Map(MONTH_OF_YEAR -> seasonOrQuarterOrMonthOrWeek.toInt)
- }
- }
- }
- case Array(year, monthOrWeek, dayOrWeekend) => Map[TemporalField, Int](YEAR -> year.toInt) ++ {
- monthOrWeek.head match {
- case 'W' => dayOrWeekend match {
- case "WE" => Map(ISO_WEEK.OF_YEAR -> monthOrWeek.tail.toInt, WEEKEND_OF_WEEK -> 1)
- }
- case _ => Map(MONTH_OF_YEAR -> monthOrWeek.toInt, DAY_OF_MONTH -> dayOrWeekend.toInt)
- }
- }
- case Array(year, month, day, hourOrPartOfDay) =>
- Map[TemporalField, Int](YEAR -> year.toInt, MONTH_OF_YEAR -> month.toInt, DAY_OF_MONTH -> day.toInt) ++ {
- hourOrPartOfDay match {
- case "MO" => Map(MORNING_OF_DAY -> 1)
- case "AF" => Map(AFTERNOON_OF_DAY -> 1)
- case "EV" => Map(EVENING_OF_DAY -> 1)
- case "NI" => Map(NIGHT_OF_DAY -> 1)
- case hour => Map(HOUR_OF_DAY -> hour.toInt)
- }
- }
- case Array(year, month, day, hour, minute) =>
- Map(YEAR -> year.toInt, MONTH_OF_YEAR -> month.toInt, DAY_OF_MONTH -> day.toInt,
- HOUR_OF_DAY -> hour.toInt, MINUTE_OF_HOUR -> minute.toInt)
- case Array(year, month, day, hour, minute, second) =>
- Map(YEAR -> year.toInt, MONTH_OF_YEAR -> month.toInt, DAY_OF_MONTH -> day.toInt,
- HOUR_OF_DAY -> hour.toInt, MINUTE_OF_HOUR -> minute.toInt, SECOND_OF_MINUTE -> second.toInt)
- case _ => throw new Exception("%s %s".format(value, value.split("[-T]").toList))
- }
-
- // set all the requested fields
- val zero = ZonedDateTime.of(LocalDateTime.of(1, 1, 1, 0, 0), ZoneId.of("Z"))
- val nonTruncatedStart = fieldValues.foldLeft(zero) {
- case (dateTime, (field, value)) => dateTime.`with`(field, value)
- }
-
- // truncate the date-time based on the smallest field's base unit
- val minField = fieldValues.keySet.minBy(_.getBaseUnit().getDuration())
- val minUnit = minField.getBaseUnit()
- val start = this.truncate(nonTruncatedStart, minUnit)
-
- // for things that overlap the boundary (e.g. NIGHT_OF_DAY) truncation will move them to
- // the previous range (e.g. the previous day) so we'll need to move them back
- val isNotTooEarly = fieldValues.forall { case (field, value) => start.get(field) == value }
- val adjustedStart = if (isNotTooEarly) start else start.plus(1, minField.getRangeUnit())
-
- // create a time span of exactly one unit in size
- this.startingAt(adjustedStart, Period(Map(minUnit -> 1)), Modifier.Exact)
- }
-
- /**
- * Creates a [[TimeSpan]] from a start point and a period.
- *
- * @param start The start point of the time span.
- * @param period The duration of the time span.
- * @param modifier The modifier of the time span, or [[Modifier.Exact]] if the span is exact.
- * @return A time span starting at the given point and lasting for the given period.
- */
- def startingAt(start: ZonedDateTime, period: Period, modifier: Modifier): TimeSpan = {
- TimeSpan(start, period.addTo(start), period, modifier)
- }
-
- /**
- * Creates a [[TimeSpan]] from an end point and a period.
- *
- * @param end The end point of the time span.
- * @param period The duration of the time span.
- * @param modifier The modifier of the time span, or [[Modifier.Exact]] if the span is exact.
- * @return A time span ending at the given point and lasting for the given period.
- */
- def endingAt(end: ZonedDateTime, period: Period, modifier: Modifier): TimeSpan = {
- TimeSpan(period.subtractFrom(end), end, period, modifier)
- }
-
- private[timenorm] def truncate(time: ZonedDateTime, unit: TemporalUnit): ZonedDateTime = {
- this.unitToFieldsToTruncate.get(unit) match {
- case None => throw new UnsupportedOperationException("Don't know how to truncate " + unit)
- case Some(fields) => fields.foldLeft(time) {
- case (time, field) => {
- val nUnits = time.get(field) - field.range.getMinimum
- time.minus(nUnits, field.getBaseUnit)
- }
- }
- }
- }
-
- private[timenorm] val fieldFormats = Map[TemporalField, Int => String](
- (CENTURY, "%02d".format(_)),
- (DECADE, "%03d".format(_)),
- (YEAR, "%04d".format(_)),
- (QUARTER_OF_YEAR, "-Q%d".format(_)),
- (SPRING_OF_YEAR, _ match { case 1 => "-SP" }),
- (SUMMER_OF_YEAR, _ match { case 1 => "-SU" }),
- (FALL_OF_YEAR, _ match { case 1 => "-FA" }),
- (WINTER_OF_YEAR, _ match { case 1 => "-WI" }),
- (MONTH_OF_YEAR, "-%02d".format(_)),
- (DAY_OF_MONTH, "-%02d".format(_)),
- (ISO_WEEK.OF_YEAR, "-W%02d".format(_)),
- (DAY_OF_WEEK, "-%d".format(_)),
- (WEEKEND_OF_WEEK, _ match { case 1 => "-WE" }),
- (MORNING_OF_DAY, _ match { case 1 => "TMO" }),
- (AFTERNOON_OF_DAY, _ match { case 1 => "TAF" }),
- (EVENING_OF_DAY, _ match { case 1 => "TEV" }),
- (NIGHT_OF_DAY, _ match { case 1 => "TNI" }),
- (HOUR_OF_DAY, "T%02d".format(_)),
- (MINUTE_OF_HOUR, ":%02d".format(_)),
- (SECOND_OF_MINUTE, ":%02d".format(_)))
-
- private val unitToFieldsToDisplay = Map[TemporalUnit, Seq[TemporalField]](
- CENTURIES -> Seq(CENTURY),
- DECADES -> Seq(DECADE),
- YEARS -> Seq(YEAR),
- QUARTER_YEARS -> Seq(YEAR, QUARTER_OF_YEAR),
- SPRINGS -> Seq(YEAR, SPRING_OF_YEAR),
- SUMMERS -> Seq(YEAR, SUMMER_OF_YEAR),
- FALLS -> Seq(YEAR, FALL_OF_YEAR),
- WINTERS -> Seq(YEAR, WINTER_OF_YEAR),
- MONTHS -> Seq(YEAR, MONTH_OF_YEAR),
- WEEKS -> Seq(YEAR, ISO_WEEK.OF_YEAR),
- WEEKENDS -> Seq(YEAR, ISO_WEEK.OF_YEAR, WEEKEND_OF_WEEK),
- DAYS -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH),
- MORNINGS -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, MORNING_OF_DAY),
- AFTERNOONS -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, AFTERNOON_OF_DAY),
- EVENINGS -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, EVENING_OF_DAY),
- NIGHTS -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, NIGHT_OF_DAY),
- HOURS -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY),
- MINUTES -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR),
- SECONDS -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE))
-
- private val unitToFieldsToTruncate = Map[TemporalUnit, Seq[TemporalField]](
- CENTURIES -> Seq(YEAR_OF_CENTURY, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- DECADES -> Seq(YEAR_OF_DECADE, MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- YEARS -> Seq(MONTH_OF_YEAR, DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- QUARTER_YEARS -> Seq(DAY_OF_QUARTER, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- SPRINGS -> Seq(DAY_OF_SPRING, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- SUMMERS -> Seq(DAY_OF_SUMMER, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- FALLS -> Seq(DAY_OF_FALL, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- WINTERS -> Seq(DAY_OF_WINTER, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- MONTHS -> Seq(DAY_OF_MONTH, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- WEEKS -> Seq(DAY_OF_WEEK, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- WEEKENDS -> Seq(DAY_OF_WEEKEND, HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- DAYS -> Seq(HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- MORNINGS -> Seq(HOUR_OF_MORNING, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- AFTERNOONS -> Seq(HOUR_OF_AFTERNOON, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- EVENINGS -> Seq(HOUR_OF_EVENING, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- NIGHTS -> Seq(HOUR_OF_NIGHT, MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- HOURS -> Seq(MINUTE_OF_HOUR, SECOND_OF_MINUTE),
- MINUTES -> Seq(SECOND_OF_MINUTE),
- SECONDS -> Seq())
-}
-
-/**
- * A set of time spans, such as "Mondays" or "October each year".
- *
- * @constructor Creates a new time span set from a mapping of field values.
- * @param fields A mapping from temporal fields to field values. All known field values should be
- * provided here; temporal fields with unknown values should be absent from the map. For
- * example, `Map(DAY_OF_WEEK -> 1)` would represent "Mondays".
- */
-case class TimeSpanSet(fields: Map[TemporalField, Int]) extends Temporal {
- val timeMLValue: String = {
- val (timeFields, dayFields) = fields.keySet.partition(_.getBaseUnit().getDuration().compareTo(DAYS.getDuration()) < 0)
- val minDayField =
- if (dayFields.isEmpty) DAY_OF_MONTH
- else dayFields.minBy(_.getBaseUnit().getDuration())
- val dayFieldsToDisplay = TimeSpanSet.fieldToDayFieldsToDisplay(minDayField)
- val timeFieldsToDisplay =
- if (timeFields.isEmpty) Seq.empty[TemporalField]
- else TimeSpanSet.fieldToTimeFieldsToDisplay(timeFields.minBy(_.getBaseUnit().getDuration()))
- val fieldsToDisplay = dayFieldsToDisplay ++ timeFieldsToDisplay
- val parts =
- for (field <- fieldsToDisplay) yield fields.get(field) match {
- case Some(value) => TimeSpan.fieldFormats(field)(value)
- case None => TimeSpanSet.unspecifiedFieldFormats(field)
- }
- parts.mkString
- }
-}
-
-
-private object TimeSpanSet {
-
- private val fieldToDayFieldsToDisplay = Map[TemporalField, Seq[TemporalField]](
- CENTURY -> Seq(CENTURY),
- DECADE -> Seq(DECADE),
- YEAR -> Seq(YEAR),
- QUARTER_OF_YEAR -> Seq(YEAR, QUARTER_OF_YEAR),
- SPRING_OF_YEAR -> Seq(YEAR, SPRING_OF_YEAR),
- SUMMER_OF_YEAR -> Seq(YEAR, SUMMER_OF_YEAR),
- FALL_OF_YEAR -> Seq(YEAR, FALL_OF_YEAR),
- WINTER_OF_YEAR -> Seq(YEAR, WINTER_OF_YEAR),
- MONTH_OF_YEAR -> Seq(YEAR, MONTH_OF_YEAR),
- ISO_WEEK.OF_YEAR -> Seq(YEAR, ISO_WEEK.OF_YEAR),
- WEEKEND_OF_WEEK -> Seq(YEAR, ISO_WEEK.OF_YEAR, WEEKEND_OF_WEEK),
- DAY_OF_WEEK -> Seq(YEAR, ISO_WEEK.OF_YEAR, DAY_OF_WEEK),
- DAY_OF_MONTH -> Seq(YEAR, MONTH_OF_YEAR, DAY_OF_MONTH))
-
- private val fieldToTimeFieldsToDisplay = Map[TemporalField, Seq[TemporalField]](
- MORNING_OF_DAY -> Seq(MORNING_OF_DAY),
- AFTERNOON_OF_DAY -> Seq(AFTERNOON_OF_DAY),
- EVENING_OF_DAY -> Seq(EVENING_OF_DAY),
- NIGHT_OF_DAY -> Seq(NIGHT_OF_DAY),
- HOUR_OF_DAY -> Seq(HOUR_OF_DAY),
- MINUTE_OF_HOUR -> Seq(HOUR_OF_DAY, MINUTE_OF_HOUR),
- SECOND_OF_MINUTE -> Seq(HOUR_OF_DAY, MINUTE_OF_HOUR, SECOND_OF_MINUTE))
-
- private val unspecifiedFieldFormats = Map[TemporalField, String](
- (CENTURY, "XX"),
- (DECADE, "XXX"),
- (YEAR, "XXXX"),
- (MONTH_OF_YEAR, "-XX"),
- (DAY_OF_MONTH, "-XX"),
- (ISO_WEEK.OF_YEAR, "-WXX"),
- (HOUR_OF_DAY, "TXX"),
- (MINUTE_OF_HOUR, ":XX"),
- (SECOND_OF_MINUTE, ":XX"))
-}
-
-/**
- * A temporal modifier, such as "approximately" or "less than".
- *
- * Modifier types are provided by the [[Modifier$ Modifier]] companion object.
- *
- * @constructor Creates a new temporal modifier.
- * @param timeMLValueOption The string value of the modifier or None for no modifier.
- */
-abstract class Modifier(val timeMLValueOption: Option[String]) {
-
- /**
- * Unifies this modifier with another modifier.
- *
- * Currently only allows modifiers to be unified with [[Modifier.Exact]].
- */
- def &(that: Modifier): Modifier = {
- if (this == that) {
- this
- } else if (this == Modifier.Exact) {
- that
- } else if (that == Modifier.Exact) {
- this
- } else {
- throw new IllegalArgumentException(
- "cannot combine %s and %s".format(this, that))
- }
- }
-}
-
-/**
- * Provides [[Modifier]] instances.
- */
-object Modifier {
- case object Exact extends Modifier(None)
- case object Before extends Modifier(Some("BEFORE"))
- case object After extends Modifier(Some("AFTER"))
- case object OnOrBefore extends Modifier(Some("ON_OR_BEFORE"))
- case object OnOrAfter extends Modifier(Some("ON_OR_AFTER"))
- case object LessThan extends Modifier(Some("LESS_THAN"))
- case object MoreThan extends Modifier(Some("MORE_THAN"))
- case object EqualOrLess extends Modifier(Some("EQUAL_OR_LESS"))
- case object EqualOrMore extends Modifier(Some("EQUAL_OR_MORE"))
- case object Start extends Modifier(Some("START"))
- case object Mid extends Modifier(Some("MID"))
- case object End extends Modifier(Some("END"))
- case object Approx extends Modifier(Some("APPROX"))
-
- private val values = Seq[Modifier](
- Exact,
- Before,
- After,
- OnOrBefore,
- OnOrAfter,
- LessThan,
- MoreThan,
- EqualOrLess,
- EqualOrMore,
- Start,
- Mid,
- End,
- Approx)
-
- private val stringToModifier =
- (for (modifier <- values; value <- modifier.timeMLValueOption) yield value -> modifier).toMap
-
- /**
- * Gets the modifier corresponding to a string value.
- *
- * @param timeMLValue A
- * [[http://timeml.org/site/publications/timeMLdocs/timeml_1.2.1.html#timex3 TIMEX3 mod attribute]]
- * string.
- * @return The modifier for the given string.
- */
- def valueOf(timeMLValue: String): Modifier = this.stringToModifier(timeMLValue)
-}
diff --git a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/TemporalExpressionParser.scala b/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/TemporalExpressionParser.scala
deleted file mode 100755
index a88eb19..0000000
--- a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/TemporalExpressionParser.scala
+++ /dev/null
@@ -1,590 +0,0 @@
-package org.clulab.timenorm.scfg
-
-import java.io.File
-import java.net.URL
-import java.text.Normalizer
-import java.time.temporal.IsoFields.QUARTER_YEARS
-import java.time.{DateTimeException, LocalDate}
-import java.util.logging.Logger
-
-import scala.collection.immutable.IndexedSeq
-import scala.io.Source
-import scala.util.{Failure, Success, Try}
-
-object TemporalExpressionParser {
-
- /** The built-in English time parser.
- */
- def en(): TemporalExpressionParser = new TemporalExpressionParser(
- grammarURL = this.getClass.getResource("/org/clulab/timenorm/en.grammar"),
- tokenize = DefaultTokenizer
- )
-
- /** The built-in Italian time parser.
- */
- def it(): TemporalExpressionParser = new TemporalExpressionParser(
- grammarURL = this.getClass.getResource("/org/clulab/timenorm/it.grammar"),
- tokenize = ItalianTokenizer
- )
-
- /** The built-in Spanish time parser.
- */
- def es(): TemporalExpressionParser = new TemporalExpressionParser(
- grammarURL = this.getClass.getResource("/org/clulab/timenorm/es.grammar"),
- tokenize = DefaultTokenizer
- )
-
- /** Runs a demo of TemporalExpressionParser that reads time expressions from
- * standard input and writes their normalized forms to standard output.
- *
- * Note: This is only provided for demonstrative purposes.
- */
- def main(args: Array[String]): Unit = {
-
- // create the parser, using a grammar file if specified
- val parser = args match {
- case Array() =>
- new TemporalExpressionParser
- case Array(grammarPath) =>
- new TemporalExpressionParser(new File(grammarPath).toURI.toURL)
- case _ =>
- System.err.printf(
- "usage: %s [grammar-file]",
- this.getClass.getSimpleName
- )
- System.exit(1)
- throw new IllegalArgumentException
- }
-
- // use the current date as an anchor
- val now = LocalDate.now()
- val anchor = TimeSpan.of(now.getYear, now.getMonthValue, now.getDayOfMonth)
- System.out.printf("Assuming anchor: %s\n", anchor.timeMLValue)
- System.out.println("Type in a time expression (or :quit to exit)")
-
- // repeatedly prompt for a time expression and then try to parse it
- System.out.print(">>> ")
- for (line <- Source.stdin.getLines.takeWhile(_ != ":quit")) {
- parser.parse(line, anchor) match {
- case Failure(exception) =>
- System.out.printf("Error: %s\n", exception.getMessage)
- case Success(temporal) =>
- System.out.println(temporal.timeMLValue)
- }
- System.out.print(">>> ")
- }
- }
-}
-
-/** A parser for natural language expressions of time, based on a synchronous
- * context free grammar. Typical usage:
- * {{{
- * // create a new parser (using the default English grammar)
- * val parser = new TemporalExpressionParser
- * // establish an anchor time
- * val anchor = TimeSpan.of(2013, 1, 4)
- * // parse an expression given an anchor time (assuming here that it succeeds)
- * val Success(temporal) = parser.parse("two weeks ago", anchor)
- * // get the TimeML value ("2012-W51") from the Temporal
- * val value = temporal.timeMLValue
- * }}}
- *
- * @constructor
- * Creates a parser from a URL to a grammar file.
- * @param grammarURL
- * The URL of a grammar file, in [[SynchronousGrammar.fromString]] format. If
- * not specified, the default English grammar on the classpath is used. Note
- * that if another grammar is specified, it may be necessary to override the
- * [[tokenize]] method.
- * @param tokenize
- * A function that splits a string into tokens. The default tokenizer is
- * appropriate for the default English grammar. Other languages may require
- * alternate tokenizers.
- */
-class TemporalExpressionParser(
- grammarURL: URL = classOf[TemporalExpressionParser].getResource(
- "/org/clulab/timenorm/en.grammar"
- ),
- tokenize: String => IndexedSeq[String] = DefaultTokenizer
-) {
- private val logger = Logger.getLogger(this.getClass.getName)
- private val grammarText = Source.fromURL(grammarURL, "UTF-8").mkString
- private val grammar = SynchronousGrammar.fromString(grammarText)
- private val sourceSymbols = grammar.sourceSymbols()
- private val parser = new SynchronousParser(grammar)
-
- /** Tries to parse a source string into a single [[Temporal]] object.
- *
- * @param sourceText
- * The input string in the source language.
- * @param anchor
- * The anchor time (required for resolving relative times like "today").
- * @return
- * The most likely [[Temporal]] parse according to the parser's heuristic.
- */
- def parse(sourceText: String, anchor: TimeSpan): Try[Temporal] = {
- this.parseAll(sourceText, anchor).map(_.head)
- }
-
- /** Try to parse a source string into possible [[Temporal]] objects.
- *
- * @param sourceText
- * The input string in the source language.
- * @param anchor
- * The anchor time (required for resolving relative times like "today").
- * @return
- * A sequence of [[Temporal]] objects representing the possible parses. The
- * sequence is sorted by a heuristic that tries to put the most promising
- * parses first.
- */
- def parseAll(sourceText: String, anchor: TimeSpan): Try[Seq[Temporal]] = {
- // tokenize the string, filtering out any tokens not in the grammar
- val tokens = this.tokenize(sourceText).filter { token =>
- this.sourceSymbols.contains(token) || SynchronousGrammar.isNumber(token)
- }
-
- // parse the tokens into TemporalParses, failing if there is a syntactic error
- val parsesTry =
- try {
- val trees = this.parser.parseAll(tokens)
- // two unique trees can generate the same TemporalParse, so remove duplicates
- Success(trees.map(TemporalParse).toSet)
- } catch {
- case e: UnsupportedOperationException => Failure(e)
- }
-
- // if there was no syntactic error, convert the TemporalParses to Temporals
- parsesTry match {
- case Failure(e) => Failure(e)
-
- case Success(parses) =>
- // assume that the grammar ambiguity for any expression is at most 2
- if (parses.size > 2) {
- val message =
- "Expected no more than 2 parses for \"%s\", found:\n %s"
- this.logger.warning(
- message.format(sourceText, parses.mkString("\n "))
- )
- }
-
- // try to convert each TemporalParse to a Temporal
- val temporalTries = for (parse <- parses) yield {
- try {
- Success(parse match {
- case parse: PeriodParse => parse.toPeriod
- case parse: PeriodSetParse => parse.toPeriodSet
- case parse: TimeSpanParse => parse.toTimeSpan(anchor)
- case parse: TimeSpanSetParse => parse.toTimeSpanSet
- })
- } catch {
- case e @ (_: UnsupportedOperationException |
- _: DateTimeException) =>
- Failure(e)
- }
- }
-
- // if there all TemporalParses had semantic errors, fail
- val temporals = temporalTries.collect { case Success(temporal) =>
- temporal
- }
-
- if (temporals.isEmpty) {
- temporalTries.collect { case Failure(e) => Failure(e) }.head
- }
- // otherwise, sort the Temporals by the heuristic
- else {
- // val timeMLs =
- // for (temporal <- temporals)
- // yield temporal.timeMLValue
- // val forPrints = temporals zip timeMLs
- // println("TimeML Value \t Raw Parse")
- // forPrints.foreach { case (temporal, timeML) =>
- // println(s"$timeML \t $temporal")
- // }
-
- val finalAnswers = temporals.toSeq.sorted(this.heuristicFor(anchor))
- if (parses.size > 2){
- val finalAnswer = finalAnswers.head
- val finalMLValue = finalAnswer.timeMLValue
- this.logger.warning(
- s"final answer for the parses $finalMLValue"
- )
- }
- Success(finalAnswers)
- }
-
- }
- }
-
- // a heuristic for selecting between ambiguous parses
- private def heuristicFor(anchor: TimeSpan): Ordering[Temporal] = {
- val isQuarter = (timeSpan: TimeSpan) =>
- timeSpan.period.unitAmounts.keySet == Set(QUARTER_YEARS)
- val anchorIsQuarter = isQuarter(anchor)
- Ordering.fromLessThan[Temporal] {
- // prefer time spans to periods
- case (period: Period, timeSpan: TimeSpan) =>
- false
- case (timeSpan: TimeSpan, period: Period) =>
- true
- // if the anchor is in quarters, prefer a result in quarters
- // otherwise, prefer earlier time spans
- case (timeSpan1: TimeSpan, timeSpan2: TimeSpan) =>
- (anchorIsQuarter, isQuarter(timeSpan1), isQuarter(timeSpan2)) match {
- case (true, true, false) =>
- true
- case (true, false, true) =>
- false
- case _ =>
- val span1Dist = (anchor.start.toEpochSecond() - timeSpan1.start
- .toEpochSecond()).abs
- val span2Dist = (anchor.start.toEpochSecond() - timeSpan2.start
- .toEpochSecond()).abs
- span1Dist < span2Dist
- }
- // throw an exception for anything else
- case other =>
- throw new UnsupportedOperationException(
- "Don't know how to order " + other
- )
- }
- }
-}
-
-/** Splits a string into tokens to be used as input for the synchronous parser.
- *
- * This tokenizer is appropriate for the default English grammar.
- */
-object DefaultTokenizer extends (String => IndexedSeq[String]) {
- final val wordBoundary = "\\b".r
- final val letterNonLetterBoundary =
- "(?<=[^\\p{L}])(?=[\\p{L}])|(?<=[\\p{L}])(?=[^\\p{L}])".r
-
- def apply(sourceText: String): IndexedSeq[String] = {
- val tokens =
- for (untrimmedWord <- this.wordBoundary.split(sourceText).toIndexedSeq)
- yield {
- val word = untrimmedWord.trim
- if (word.isEmpty) {
- IndexedSeq.empty[String]
- }
- // special case for concatenated YYYYMMDD
- else if (word.matches("^\\d{8}$")) {
- IndexedSeq(
- word.substring(0, 4),
- "-",
- word.substring(4, 6),
- "-",
- word.substring(6, 8)
- )
- }
- // special case for concatenated YYMMDD
- else if (word.matches("^\\d{6}$")) {
- IndexedSeq(
- word.substring(0, 2),
- "-",
- word.substring(2, 4),
- "-",
- word.substring(4, 6)
- )
- }
- // special case for concatenated HHMMTZ
- else if (word.matches("^\\d{4}[A-Z]{3,4}$")) {
- IndexedSeq(
- word.substring(0, 2),
- ":",
- word.substring(2, 4),
- word.substring(4).toLowerCase
- )
- }
- // otherwise, split at all letter/non-letter boundaries
- else {
- this.letterNonLetterBoundary
- .split(word)
- .toIndexedSeq
- .map(_.trim.toLowerCase)
- .filterNot(_.isEmpty)
- }
- }
- tokens.flatten
- }
-}
-
-object ItalianTokenizer extends (String => IndexedSeq[String]) {
-
- def apply(sourceText: String): IndexedSeq[String] = {
- val cleanedText = Normalizer
- .normalize(sourceText, Normalizer.Form.NFD)
- .replaceAll("\\p{InCombiningDiacriticalMarks}+", "")
- val tokens =
- for (
- untrimmedWord <- DefaultTokenizer.wordBoundary
- .split(cleanedText)
- .toIndexedSeq
- ) yield {
- val word = untrimmedWord.trim
- if (word.isEmpty) {
- IndexedSeq.empty[String]
- }
- // special case for concatenated YYYYMMDD
- else if (word.matches("^\\d{8}$")) {
- IndexedSeq(
- word.substring(0, 4),
- "-",
- word.substring(4, 6),
- "-",
- word.substring(6, 8)
- )
- }
- // special case for concatenated YYMMDD
- else if (word.matches("^\\d{6}$")) {
- IndexedSeq(
- word.substring(0, 2),
- "-",
- word.substring(2, 4),
- "-",
- word.substring(4, 6)
- )
- }
- // special case for concatenated HHMMTZ
- else if (word.matches("^\\d{4}[A-Z]{3,4}$")) {
- IndexedSeq(
- word.substring(0, 2),
- ":",
- word.substring(2, 4),
- word.substring(4).toLowerCase
- )
- }
- // special case for numbers in Italian
- else if (
- word.matches("^[Dd]ue(cen|mil)\\w+$") || word
- .matches("^[Tt]re(cen|mil)\\w+$") ||
- word.matches("^[Qq]uattro(cen|mil)\\w+$") || word
- .matches("^[Cc]inque(cen|mil)\\w+$") ||
- word.matches("^[Ss]ei(cen|mil)\\w+$") || word
- .matches("^[Ss]ette(cen|mil)\\w+$") ||
- word.matches("^[Oo]tto(cen|mil)\\w+$") || word
- .matches("^[Nn]ove(cen|mil)\\w+$") ||
- word.matches("^[Dd]ieci(mil)\\w+$") ||
- word.matches("^[Cc]ento\\w+$") || word.matches("^[Mm]ille\\w+$") ||
- word.matches("^[Vv]ent\\w+$") || word.matches("^[Tt]rent\\w+$") ||
- word.matches("^[Qq]uarant\\w+$") || word
- .matches("^[Cc]inquant\\w+$") ||
- word.matches("^[Ss]essant\\w+$") || word
- .matches("^[Ss]ettant\\w+$") ||
- word.matches("^[Oo]ttant\\w+$") || word.matches("^[Nn]ovant\\w+$") ||
- word.matches("^[Dd]eci\\w+$") || word.matches("^[Uu]ndici\\w+$") ||
- word.matches("^[Dd]odici\\w+$") || word.matches("^[Tt]redici\\w+$") ||
- word.matches("^[Qq]uattordici\\w+$") || word
- .matches("^[Qq]uindici\\w+$") ||
- word.matches("^[Ss]edici\\w+$") || word
- .matches("^[Dd]iciassette\\w+$") ||
- word.matches("^[Dd]iciotto\\w+$") || word
- .matches("^[Dd]iciannove\\w+$")
- ) {
- this.tokenizeItalianNumber(word)
- } else if (
- word.matches("^[Dd]eg?l?i?$") || word.matches("^[Dd]ell[oae]$")
- ) {
- IndexedSeq("dell")
- } else if (
- word.matches("^[Aa]g?l?i?$") || word.matches("^[Aa]ll[oae]$")
- ) {
- IndexedSeq("all")
- } else if (word.matches("^([Ii]|[Gg]li|[Ll]e)$")) { // definite plural
- IndexedSeq("le")
- } else if (word.matches("^[Qq]uest[oaei]$")) {
- IndexedSeq("quest")
- } else if (
- word.matches("^[Qq]ueg?l?i?$") || word.matches("^[Qq]uell[oaei]$")
- ) {
- IndexedSeq("quell")
- } else if (word.matches("^[Ss]cors[oaie]$")) {
- IndexedSeq("scorsx")
- } else if (word.matches("^[Pp]assat[oaie]$")) {
- IndexedSeq("passatx")
- } else if (word.matches("^[Uu]ltim[oaie]$")) {
- IndexedSeq("ultimx")
- } else if (word.matches("^[Pp]recedent[ei]$")) {
- IndexedSeq("precedentx")
- } else if (word.matches("^[Pp]rossim[oaie]$")) {
- IndexedSeq("prossimx")
- } else if (word.matches("^[Ss]uccessiv[oaie]$")) {
- IndexedSeq("successivx")
- } else if (word.matches("^[Ss]eguent[ei]$")) {
- IndexedSeq("seguentx")
- } else if (word.matches("^[Ee]ntrant[ei]$")) {
- IndexedSeq("entrantx")
- } else if (word.matches("^[Vv]entur[oaie]$")) {
- IndexedSeq("venturx")
- } else if (word.matches("^[Ff]utur[oaie]$")) {
- IndexedSeq("futurx")
- } else if (word.matches("^[Tt]utt[ie]$")) {
- IndexedSeq("tutti")
- }
- // otherwise, split at all letter/non-letter boundaries
- else {
- DefaultTokenizer.letterNonLetterBoundary
- .split(word)
- .toIndexedSeq
- .map(_.trim.toLowerCase)
- .filterNot(_.isEmpty)
- }
- }
- tokens.flatten
- }
-
- protected def tokenizeItalianNumber(word: String): Seq[String] = {
- if (word.isEmpty) {
- IndexedSeq.empty[String]
- } else if (word.matches("^[Dd]ue(cen|mil)\\w+$")) {
- IndexedSeq("due") ++ this.tokenizeItalianNumber(
- word.substring(3).toLowerCase
- )
- } else if (word.matches("^[Tt]re(cen|mil)\\w+$")) {
- IndexedSeq("tre") ++ this.tokenizeItalianNumber(
- word.substring(3).toLowerCase
- )
- } else if (word.matches("^[Qq]uattro(cen|mil)\\w+$")) {
- IndexedSeq("quattro") ++ this.tokenizeItalianNumber(
- word.substring(7).toLowerCase
- )
- } else if (word.matches("^[Cc]inque(cen|mil)\\w+$")) {
- IndexedSeq("cinque") ++ this.tokenizeItalianNumber(
- word.substring(6).toLowerCase
- )
- } else if (word.matches("^[Ss]ei(cen|mil)\\w+$")) {
- IndexedSeq("sei") ++ this.tokenizeItalianNumber(
- word.substring(3).toLowerCase
- )
- } else if (word.matches("^[Ss]ette(cen|mil)\\w+$")) {
- IndexedSeq("sette") ++ this.tokenizeItalianNumber(
- word.substring(5).toLowerCase
- )
- } else if (word.matches("^[Oo]tto(cen|mil)\\w+$")) {
- IndexedSeq("otto") ++ this.tokenizeItalianNumber(
- word.substring(4).toLowerCase
- )
- } else if (word.matches("^[Nn]ove(cen|mil)\\w+$")) {
- IndexedSeq("nove") ++ this.tokenizeItalianNumber(
- word.substring(4).toLowerCase
- )
- } else if (word.matches("^[Cc]ento\\w+$")) {
- IndexedSeq("cento") ++ this.tokenizeItalianNumber(
- word.substring(5).toLowerCase
- )
- } else if (word.matches("^[Mm]ille\\w+$")) {
- IndexedSeq("mille") ++ this.tokenizeItalianNumber(
- word.substring(5).toLowerCase
- )
- } else if (word.matches("^[Mm]ila\\w+$")) {
- IndexedSeq("mila") ++ this.tokenizeItalianNumber(
- word.substring(4).toLowerCase
- )
- } else if (word.matches("^[Vv]ent\\w+$")) {
- if (word.matches("^[Vv]enti\\w*$")) {
- IndexedSeq("venti") ++ this.tokenizeItalianNumber(
- word.substring(5).toLowerCase
- )
- } else if (word.matches("^[Vv]entennale$")) {
- IndexedSeq("ventennale")
- } else {
- IndexedSeq("vent") ++ this.tokenizeItalianNumber(
- word.substring(4).toLowerCase
- )
- }
- } else if (word.matches("^[Tt]rent\\w+$")) {
- if (word.matches("^[Tt]renta\\w*$")) {
- IndexedSeq("trenta") ++ this.tokenizeItalianNumber(
- word.substring(6).toLowerCase
- )
- } else {
- IndexedSeq("trent") ++ this.tokenizeItalianNumber(
- word.substring(5).toLowerCase
- )
- }
- } else if (word.matches("^[Qq]uarant\\w+$")) {
- if (word.matches("^[Qq]uaranta\\w*$")) {
- IndexedSeq("quaranta") ++ this.tokenizeItalianNumber(
- word.substring(8).toLowerCase
- )
- } else {
- IndexedSeq("quarant") ++ this.tokenizeItalianNumber(
- word.substring(7).toLowerCase
- )
- }
- } else if (word.matches("^[Cc]inquant\\w+$")) {
- if (word.matches("^[Cc]inquanta\\w*$")) {
- IndexedSeq("cinquanta") ++ this.tokenizeItalianNumber(
- word.substring(9).toLowerCase
- )
- } else {
- IndexedSeq("cinquant") ++ this.tokenizeItalianNumber(
- word.substring(8).toLowerCase
- )
- }
- } else if (word.matches("^[Ss]essant\\w+$")) {
- if (word.matches("^[Ss]essanta\\w*$")) {
- IndexedSeq("sessanta") ++ this.tokenizeItalianNumber(
- word.substring(8).toLowerCase
- )
- } else {
- IndexedSeq("sessant") ++ this.tokenizeItalianNumber(
- word.substring(7).toLowerCase
- )
- }
- } else if (word.matches("^[Ss]ettant\\w+$")) {
- if (word.matches("^[Ss]ettanta\\w*$")) {
- IndexedSeq("settanta") ++ this.tokenizeItalianNumber(
- word.substring(8).toLowerCase
- )
- } else {
- IndexedSeq("settant") ++ this.tokenizeItalianNumber(
- word.substring(7).toLowerCase
- )
- }
- } else if (word.matches("^[Oo]ttant\\w+$")) {
- if (word.matches("^[Oo]ttanta\\w*$")) {
- IndexedSeq("ottanta") ++ this.tokenizeItalianNumber(
- word.substring(7).toLowerCase
- )
- } else {
- IndexedSeq("ottant") ++ this.tokenizeItalianNumber(
- word.substring(6).toLowerCase
- )
- }
- } else if (word.matches("^[Nn]ovant\\w+$")) {
- if (word.matches("^[Nn]ovanta\\w*$")) {
- IndexedSeq("novanta") ++ this.tokenizeItalianNumber(
- word.substring(7).toLowerCase
- )
- } else {
- IndexedSeq("novant") ++ this.tokenizeItalianNumber(
- word.substring(6).toLowerCase
- )
- }
- } else if (word.matches("^[Dd]ecina$")) {
- IndexedSeq("dieci", "na")
- } else if (word.matches("^[Uu]ndicina$")) {
- IndexedSeq("undici", "na")
- } else if (word.matches("^[Dd]odicina$")) {
- IndexedSeq("dodici", "na")
- } else if (word.matches("^[Tt]redicina$")) {
- IndexedSeq("tredici", "na")
- } else if (word.matches("^[Qq]uattordicina$")) {
- IndexedSeq("quattordici", "na")
- } else if (word.matches("^[Qq]uindicina$")) {
- IndexedSeq("quindici", "na")
- } else if (word.matches("^[Ss]edicina$")) {
- IndexedSeq("sedici", "na")
- } else if (word.matches("^[Dd]icissettena$")) {
- IndexedSeq("diciassette", "na")
- } else if (word.matches("^[Dd]iciottona$")) {
- IndexedSeq("diciotto", "na")
- } else if (word.matches("^[Dd]iciannovena$")) {
- IndexedSeq("diciannove", "na")
- } else {
- IndexedSeq(word)
- }
- }
-}
diff --git a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/TemporalParse.scala b/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/TemporalParse.scala
deleted file mode 100755
index 0126e87..0000000
--- a/timelines/tweaked-timenorm/src/main/scala/org/clulab/timenorm/scfg/TemporalParse.scala
+++ /dev/null
@@ -1,560 +0,0 @@
-package org.clulab.timenorm.scfg
-
-import java.time.temporal._
-import java.time.{LocalDateTime, ZoneId, ZonedDateTime}
-
-import org.clulab.time._
-import org.clulab.timenorm.scfg.PeriodSet.{Frequency, Quantifier}
-import org.clulab.timenorm.scfg.SynchronousParser.Tree
-
-import scala.collection.immutable.Seq
-
-
-trait TokenParser {
- def toInt(token: String): Int
- def toTemporalUnit(token: String): TemporalUnit
- def toTemporalField(token: String): TemporalField
-}
-
-class DefaultTokenParser extends TokenParser {
- def toInt(token: String): Int = token.toInt
- def toTemporalUnit(token: String): TemporalUnit = token match {
- case "MORNINGS" => MORNINGS
- case "AFTERNOONS" => AFTERNOONS
- case "EVENINGS" => EVENINGS
- case "NIGHTS" => NIGHTS
- case "WEEKENDS" => WEEKENDS
- case "SPRINGS" => SPRINGS
- case "SUMMERS" => SUMMERS
- case "FALLS" => FALLS
- case "WINTERS" => WINTERS
- case "QUARTER_YEARS" => IsoFields.QUARTER_YEARS
- case "WEEK_BASED_YEARS" => IsoFields.WEEK_BASED_YEARS
- case "UNSPECIFIED" => UNSPECIFIED
- case _ => ChronoUnit.valueOf(token)
- }
- def toTemporalField(token: String): TemporalField = token match {
- case "MORNING_OF_DAY" => MORNING_OF_DAY
- case "AFTERNOON_OF_DAY" => AFTERNOON_OF_DAY
- case "EVENING_OF_DAY" => EVENING_OF_DAY
- case "NIGHT_OF_DAY" => NIGHT_OF_DAY
- case "EASTER_DAY_OF_YEAR" => EASTER_DAY_OF_YEAR
- case "DAY_OF_WEEKEND" => DAY_OF_WEEKEND
- case "WEEKEND_OF_WEEK" => WEEKEND_OF_WEEK
- case "DAY_OF_SPRING" => DAY_OF_SPRING
- case "SPRING_OF_YEAR" => SPRING_OF_YEAR
- case "DAY_OF_SUMMER" => DAY_OF_SUMMER
- case "SUMMER_OF_YEAR" => SUMMER_OF_YEAR
- case "DAY_OF_FALL" => DAY_OF_FALL
- case "FALL_OF_YEAR" => FALL_OF_YEAR
- case "DAY_OF_WINTER" => DAY_OF_WINTER
- case "WINTER_OF_YEAR" => WINTER_OF_YEAR
- case "YEAR_OF_DECADE" => YEAR_OF_DECADE
- case "DECADE" => DECADE
- case "DECADE_OF_CENTURY" => DECADE_OF_CENTURY
- case "YEAR_OF_CENTURY" => YEAR_OF_CENTURY
- case "CENTURY" => CENTURY
- // the pattern matcher of 2.10.0 can't handle such big case statements, so break it up a bit
- case token => token match {
- case "DAY_OF_QUARTER" => IsoFields.DAY_OF_QUARTER
- case "QUARTER_OF_YEAR" => IsoFields.QUARTER_OF_YEAR
- case "WEEK_BASED_YEAR" => IsoFields.WEEK_BASED_YEAR
- case "WEEK_OF_WEEK_BASED_YEAR" => IsoFields.WEEK_OF_WEEK_BASED_YEAR
- case _ => ChronoField.valueOf(token)
- }
- }
-}
-object DefaultTokenParser extends DefaultTokenParser
-
-private[timenorm] abstract class CanFail(name: String) {
- private[timenorm] def fail[T](tree: Tree): T = {
- throw new UnsupportedOperationException(
- "Don't know how to parse %s from %s".format(this.name, tree match {
- case tree: Tree.Terminal => tree.token
- case tree: Tree.NonTerminal => tree.rule.symbol + " -> " + tree.children.map {
- case child: Tree.Terminal => child.token
- case child: Tree.NonTerminal => child.rule.symbol
- }.mkString(" ")
- }))
- }
-}
-
-sealed abstract class TemporalParse
-
-object TemporalParse extends CanFail("[Temporal]") with (Tree => TemporalParse) {
-
- def apply(tree: Tree): TemporalParse = {
- this.applyNoImplicit(tree, DefaultTokenParser)
- }
-
- def apply(tree: Tree)(implicit tokenParser: TokenParser): TemporalParse = {
- this.applyNoImplicit(tree, tokenParser)
- }
-
- private def applyNoImplicit(tree: Tree, tokenParser: TokenParser): TemporalParse = {
- implicit val parser = tokenParser
- tree match {
- case tree: Tree.NonTerminal => tree.rule.basicSymbol match {
- case "[Period]" => PeriodParse(tree)
- case "[PeriodSet]" => PeriodSetParse(tree)
- case "[TimeSpan]" => TimeSpanParse(tree)
- case "[TimeSpanSet]" => TimeSpanSetParse(tree)
- case _ => fail(tree)
- }
- case _ => fail(tree)
- }
- }
-}
-
-case class IntParse(value: Int)
-object IntParse extends CanFail("[Int]") {
- def apply(tree: Tree)(implicit tokenParser: TokenParser): IntParse = tree match {
- case Tree.Terminal(number) =>
- IntParse(tokenParser.toInt(number))
- case tree =>
- val number = this.toDigits(tree).reverse.zipWithIndex.foldLeft(0){
- case (sum, (digit, index)) => sum + digit * math.pow(10, index).toInt
- }
- IntParse(number)
- }
-
- private def toDigits(tree: Tree): List[Int] = tree match {
- case Tree.Terminal(number) =>
- number.toInt :: Nil
- case Tree.NonTerminal(rule, children) if rule.basicSymbol == "[Int]" =>
- children.flatMap(this.toDigits)
- case _ => fail(tree)
- }
-}
-
-case class UnitParse(value: TemporalUnit)
-object UnitParse extends CanFail("[Unit]") {
- def apply(tree: Tree)(implicit tokenParser: TokenParser): UnitParse = tree match {
- case Tree.Terminal(unit) =>
- UnitParse(tokenParser.toTemporalUnit(unit))
- case Tree.NonTerminal(rule, tree :: Nil) if rule.basicSymbol == "[Unit]" =>
- UnitParse(tree)
- case _ => fail(tree)
- }
-}
-
-case class FieldValueParse(fieldValues: Map[TemporalField, Int]) {
- for ((field, value) <- fieldValues; if !field.range().isValidValue(value))
- throw new UnsupportedOperationException("field %s cannot have value %s".format(field, value))
-}
-object FieldValueParse extends CanFail("[FieldValue]") {
- def apply(tree: Tree)(implicit tokenParser: TokenParser): FieldValueParse = tree match {
- case tree: Tree.NonTerminal if tree.rule.basicSymbol == "[FieldValue]" => tree.children match {
- case Tree.Terminal(field) :: number :: Nil =>
- FieldValueParse(Map(tokenParser.toTemporalField(field) -> IntParse(number).value))
- case children =>
- FieldValueParse(children.map(FieldValueParse.apply).map(_.fieldValues).flatten.toMap)
- }
- case _ => fail(tree)
- }
-}
-
-sealed abstract class PeriodParse extends TemporalParse {
- def toPeriod: Period
-}
-
-object PeriodParse extends CanFail("[Period]") {
-
- def apply(tree: Tree)(implicit tokenParser: TokenParser): PeriodParse = tree match {
- case tree: Tree.Terminal =>
- Simple(1, UnitParse(tree).value)
- case tree: Tree.NonTerminal if tree.rule.basicSymbol == "[Period]" => tree.children match {
- case tree :: Nil =>
- PeriodParse(tree)
- case Tree.Terminal("Simple") :: unit :: Nil =>
- Simple(1, UnitParse(unit).value)
- case Tree.Terminal("Simple") :: amount :: unit :: Nil =>
- Simple(IntParse(amount).value, UnitParse(unit).value)
- case Tree.Terminal("Unspecified") :: unit :: Nil =>
- Unspecified(UnitParse(unit).value)
- case Tree.Terminal("Fractional") :: numerator :: denominator :: unit :: Nil =>
- Fractional(IntParse(numerator).value, IntParse(denominator).value, UnitParse(unit).value)
- case Tree.Terminal("Fractional") :: whole :: numerator :: denominator :: unit :: Nil =>
- val denominatorValue = IntParse(denominator).value
- Fractional(IntParse(whole).value * denominatorValue + IntParse(numerator).value, denominatorValue, UnitParse(unit).value)
- case Tree.Terminal("Sum") :: children =>
- Sum(children.map(PeriodParse.apply))
- case Tree.Terminal("WithModifier") :: period :: Tree.Terminal(modifier) :: Nil =>
- WithModifier(PeriodParse(period), Modifier.valueOf(modifier))
- case _ => fail(tree)
- }
- case _ => fail(tree)
- }
-
- case class Simple(amount: Int, unit: TemporalUnit) extends PeriodParse {
- def toPeriod = Period(Map(unit -> amount), Modifier.Exact)
- }
-
- case class Unspecified(unit: TemporalUnit) extends PeriodParse {
- def toPeriod = Period(Map(unit -> Int.MaxValue), Modifier.Exact)
- }
-
- case class Fractional(numerator: Int, denominator: Int, unit: TemporalUnit) extends PeriodParse {
- def toPeriod = Period.fromFractional(numerator, denominator, unit, Modifier.Exact)
- }
-
- case class Sum(periods: Seq[PeriodParse]) extends PeriodParse {
- def toPeriod = periods.foldLeft(Period.empty)(_ + _.toPeriod)
- }
-
- case class WithModifier(period: PeriodParse, modifier: Modifier) extends PeriodParse {
- def toPeriod = period.toPeriod.copy(modifier = modifier)
- }
-}
-
-sealed abstract class PeriodSetParse extends TemporalParse {
- def toPeriodSet: PeriodSet
-}
-
-object PeriodSetParse extends CanFail("[PeriodSet]") {
-
- def apply(tree: Tree)(implicit tokenParser: TokenParser): PeriodSetParse = tree match {
- case period: Tree.NonTerminal if period.rule.basicSymbol == "[Period]" =>
- Simple(PeriodParse(period))
- case tree: Tree.NonTerminal if tree.rule.basicSymbol == "[PeriodSet]" => tree.children match {
- case tree :: Nil =>
- PeriodSetParse(tree)
- case Tree.Terminal("Simple") :: period :: Nil =>
- Simple(PeriodParse(period))
- case Tree.Terminal("WithModifier") :: period :: Tree.Terminal(modifier) :: Nil =>
- WithModifier(PeriodSetParse(period), Modifier.valueOf(modifier))
- case Tree.Terminal("WithQuantifier") :: period :: Tree.Terminal(quantifier) :: Nil =>
- WithQuantifier(PeriodSetParse(period), Quantifier.valueOf(quantifier))
- case Tree.Terminal("WithFrequency") :: period :: times :: Nil =>
- WithFrequency(PeriodSetParse(period), Frequency(IntParse(times).value))
- case Tree.Terminal("WithFrequency") :: period :: times :: unit :: Nil =>
- WithFrequency(PeriodSetParse(period), Frequency(IntParse(times).value, Some(UnitParse(unit).value)))
- case _ => fail(tree)
- }
- case _ => fail(tree)
- }
-
- case class Simple(periodParse: PeriodParse) extends PeriodSetParse {
- def toPeriodSet = PeriodSet(periodParse.toPeriod, Modifier.Exact)
- }
-
- case class WithModifier(periodSet: PeriodSetParse, modifier: Modifier) extends PeriodSetParse {
- def toPeriodSet = periodSet.toPeriodSet.copy(modifier = modifier)
- }
-
- case class WithQuantifier(periodSet: PeriodSetParse, quantifier: Quantifier) extends PeriodSetParse {
- def toPeriodSet = periodSet.toPeriodSet.copy(quantifier = quantifier)
- }
-
- case class WithFrequency(periodSet: PeriodSetParse, frequency: Frequency) extends PeriodSetParse {
- def toPeriodSet = periodSet.toPeriodSet.copy(frequency = frequency)
- }
-}
-
-sealed abstract class TimeSpanParse extends TemporalParse {
- def toTimeSpan(anchor: TimeSpan): TimeSpan
-}
-
-object TimeSpanParse extends CanFail("[TimeSpan]") {
-
- def apply(tree: Tree)(implicit tokenParser: TokenParser): TimeSpanParse = tree match {
- case Tree.Terminal("PAST") =>
- Past
- case Tree.Terminal("PRESENT") =>
- Present
- case Tree.Terminal("FUTURE") =>
- Future
- case tree: Tree.NonTerminal if tree.rule.basicSymbol == "[TimeSpan]" => tree.children match {
- case tree :: Nil =>
- TimeSpanParse(tree)
- case Tree.Terminal("Simple") :: (tree: Tree.Terminal) :: Nil =>
- TimeSpanParse(tree)
- case Tree.Terminal("FindAbsolute") :: tree :: Nil =>
- FindAbsolute(FieldValueParse(tree).fieldValues)
- case Tree.Terminal("FindEarlier") :: time :: fields :: Nil =>
- FindEarlier(TimeSpanParse(time), FieldValueParse(fields).fieldValues)
- case Tree.Terminal("FindAtOrEarlier") :: time :: fields :: Nil =>
- FindAtOrEarlier(TimeSpanParse(time), FieldValueParse(fields).fieldValues)
- case Tree.Terminal("FindLater") :: time :: fields :: Nil =>
- FindLater(TimeSpanParse(time), FieldValueParse(fields).fieldValues)
- case Tree.Terminal("FindAtOrLater") :: time :: fields :: Nil =>
- FindAtOrLater(TimeSpanParse(time), FieldValueParse(fields).fieldValues)
- case Tree.Terminal("FindEnclosing") :: time :: (periodTree: Tree.NonTerminal) :: Nil if periodTree.rule.basicSymbol == "[Period]" =>
- val unit = PeriodParse(periodTree).toPeriod.unitAmounts.keySet.maxBy(_.getDuration())
- FindEnclosing(TimeSpanParse(time), unit)
- case Tree.Terminal("FindEnclosing") :: time :: (fieldTree: Tree.NonTerminal) :: Nil if fieldTree.rule.basicSymbol == "[FieldValue]" =>
- val unit = FieldValueParse(fieldTree).fieldValues.keySet.map(_.getRangeUnit()).maxBy(_.getDuration())
- FindEnclosing(TimeSpanParse(time), unit)
- case Tree.Terminal("FindEnclosing") :: time :: unit :: Nil =>
- FindEnclosing(TimeSpanParse(time), UnitParse(unit).value)
- case Tree.Terminal("FindEnclosed") :: time :: fields :: Nil =>
- FindEnclosed(TimeSpanParse(time), FieldValueParse(fields).fieldValues)
- case Tree.Terminal("StartAtStartOf") :: time :: period :: Nil =>
- StartAtStartOf(TimeSpanParse(time), PeriodParse(period))
- case Tree.Terminal("StartAtEndOf") :: time :: period :: Nil =>
- StartAtEndOf(TimeSpanParse(time), PeriodParse(period))
- case Tree.Terminal("EndAtStartOf") :: time :: period :: Nil =>
- EndAtStartOf(TimeSpanParse(time), PeriodParse(period))
- case Tree.Terminal("MoveEarlier") :: time :: period :: Nil =>
- MoveEarlier(TimeSpanParse(time), PeriodParse(period))
- case Tree.Terminal("MoveLater") :: time :: period :: Nil =>
- MoveLater(TimeSpanParse(time), PeriodParse(period))
- case Tree.Terminal("WithModifier") :: time :: Tree.Terminal(modifier) :: Nil =>
- WithModifier(TimeSpanParse(time), Modifier.valueOf(modifier))
- case _ => fail(tree)
- }
- case _ => fail(tree)
- }
-
- case object Past extends TimeSpanParse {
- def toTimeSpan(anchor: TimeSpan) = {
- new TimeSpan(TimeSpan.unspecifiedStart, anchor.start, Period.unspecified, Modifier.Approx) {
- override def timeMLValueOption = Some("PAST_REF")
- }
- }
- }
-
- case object Present extends TimeSpanParse {
- def toTimeSpan(anchor: TimeSpan) = {
- new TimeSpan(anchor.start, anchor.end, anchor.period, anchor.modifier) {
- override def timeMLValueOption = Some("PRESENT_REF")
- }
- }
- }
-
- case object Future extends TimeSpanParse {
- def toTimeSpan(anchor: TimeSpan) = {
- new TimeSpan(anchor.end, TimeSpan.unspecifiedEnd, Period.unspecified, Modifier.Approx) {
- override def timeMLValueOption = Some("FUTURE_REF")
- }
- }
- }
-
- abstract class FieldBasedTimeSpanParse(fields: Map[TemporalField, Int]) extends TimeSpanParse {
- val minUnit = fields.keySet.map(_.getBaseUnit).minBy(_.getDuration)
- }
-
- case class FindAbsolute(fields: Map[TemporalField, Int]) extends FieldBasedTimeSpanParse(fields) {
- val fieldsLargeToSmall = fields.toSeq.sortBy{
- case (field, _) => field.getRangeUnit.getDuration
- }.reverse
-
- def toTimeSpan: TimeSpan = {
- val zero = ZonedDateTime.of(LocalDateTime.of(1, 1, 1, 0, 0), ZoneId.of("Z"))
- val begin = this.fieldsLargeToSmall.foldLeft(zero) {
- case (time, (field, value)) => time.`with`(field, value)
- }
- val period = Period(Map(this.minUnit -> 1), Modifier.Exact)
- TimeSpan.startingAt(TimeSpan.truncate(begin, this.minUnit), period, Modifier.Exact)
- }
- def toTimeSpan(anchor: TimeSpan) = toTimeSpan
- }
-
- abstract class DirectedFieldSearchingTimeSpanParse(
- timeSpanParse: TimeSpanParse,
- fields: Map[TemporalField, Int],
- getStart: TimeSpan => ZonedDateTime,
- step: (ZonedDateTime, TemporalUnit) => ZonedDateTime,
- isAcceptable: (TimeSpan, TimeSpan) => Boolean) extends FieldBasedTimeSpanParse(fields) {
-
- val searchField = this.fields.keySet.minBy(_.getBaseUnit.getDuration)
- val period = Period(Map(this.minUnit -> 1), Modifier.Exact)
-
- def toTimeSpan(anchor: TimeSpan) = {
- val timeSpan = this.timeSpanParse.toTimeSpan(anchor)
-
- // search by base units for partial ranges (e.g. search by hours, not "mornings")
- var searchUnit = this.searchField.getBaseUnit match {
- case partialRange: PartialRange => partialRange.field.getBaseUnit
- case unit => unit
- }
-
- // if the field's range is fixed and the range unit is not estimated,
- // then we can move by range units once we satisfy the base unit
- var canSwitchToRange = this.searchField.range().isFixed() &&
- !this.searchField.getRangeUnit().isDurationEstimated()
-
- // one step at a time, search for a time that satisfies the field requirements
- var start = getStart(timeSpan)
- var result = this.tryToCreateTimeSpan(timeSpan, start)
- while (result.isEmpty) {
- start = step(start, searchUnit)
-
- // if we've satisfied the search field's base unit, start moving by range units
- if (canSwitchToRange && start.get(searchField) == this.fields(searchField)) {
- searchUnit = searchField.getRangeUnit()
- canSwitchToRange = false
- }
-
- // check if the current time satisfies the requirements
- result = this.tryToCreateTimeSpan(timeSpan, start)
- }
-
- // result must be present because the while loop exited
- result.get
- }
-
- private def tryToCreateTimeSpan(oldTimeSpan: TimeSpan, start: ZonedDateTime): Option[TimeSpan] = {
- // must match all field values
- if (this.fields.exists { case (field, value) => start.get(field) != value }) {
- None
- }
- // must still match all field values after truncation
- else {
- val truncatedStart = TimeSpan.truncate(start, this.minUnit)
- if (!fields.forall { case (field, value) => truncatedStart.get(field) == value }) {
- None
- }
- // must satisfy any acceptance criteria
- else {
- val timeSpan = TimeSpan.startingAt(truncatedStart, this.period, Modifier.Exact)
- if (!this.isAcceptable(oldTimeSpan, timeSpan)) None else Some(timeSpan)
- }
- }
- }
- }
-
- case class FindEarlier(timeSpanParse: TimeSpanParse, fields: Map[TemporalField, Int])
- extends DirectedFieldSearchingTimeSpanParse(
- timeSpanParse, fields, _.end, _.minus(1, _), (oldSpan, newSpan) =>
- newSpan.start.isBefore(oldSpan.start) && newSpan.end.isBefore(oldSpan.end))
-
- case class FindAtOrEarlier(timeSpanParse: TimeSpanParse, fields: Map[TemporalField, Int])
- extends DirectedFieldSearchingTimeSpanParse(
- timeSpanParse, fields, _.end, _.minus(1, _), (oldSpan, newSpan) =>
- !newSpan.start.isAfter(oldSpan.start) || !newSpan.end.isAfter(oldSpan.end))
-
- case class FindLater(timeSpanParse: TimeSpanParse, fields: Map[TemporalField, Int])
- extends DirectedFieldSearchingTimeSpanParse(
- timeSpanParse, fields, _.start, _.plus(1, _), (oldSpan, newSpan) =>
- newSpan.start.isAfter(oldSpan.start) && newSpan.end.isAfter(oldSpan.end))
-
- case class FindAtOrLater(timeSpanParse: TimeSpanParse, fields: Map[TemporalField, Int])
- extends DirectedFieldSearchingTimeSpanParse(
- timeSpanParse, fields, _.start, _.plus(1, _), (oldSpan, newSpan) =>
- !newSpan.start.isBefore(oldSpan.start) || !newSpan.end.isBefore(oldSpan.end))
-
- case class FindEnclosed(timeSpanParse: TimeSpanParse, fields: Map[TemporalField, Int])
- extends DirectedFieldSearchingTimeSpanParse(
- timeSpanParse, fields, _.start, _.plus(1, _), (oldSpan, newSpan) => {
- if (!newSpan.start.isBefore(oldSpan.end)) {
- val message = "%s not found within %s".format(fields, timeSpanParse)
- throw new UnsupportedOperationException(message)
- }
- !newSpan.start.isBefore(oldSpan.start)
- })
-
- case class FindEnclosing(timeSpanParse: TimeSpanParse, unit: TemporalUnit) extends TimeSpanParse {
- def toTimeSpan(anchor: TimeSpan) = {
- val timeSpan = timeSpanParse.toTimeSpan(anchor)
- if (timeSpan.period > unit) {
- throw new UnsupportedOperationException("%s is larger than 1 %s".format(timeSpanParse, unit))
- }
- var start = TimeSpan.truncate(timeSpan.start, unit)
- if (start.isAfter(timeSpan.start)) {
- start = start.minus(1, unit)
- }
- val period = Period(Map(unit -> 1), Modifier.Exact)
- TimeSpan.startingAt(start, period, timeSpan.modifier & period.modifier)
- }
- }
-
- abstract class MoveSpanParse(timeSpanParse: TimeSpanParse, periodParse: PeriodParse)
- extends TimeSpanParse {
- def toUnspecifiedTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier): TimeSpan
- def toTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier): TimeSpan
- def toTimeSpan(anchor: TimeSpan) = {
- val timeSpan = timeSpanParse.toTimeSpan(anchor)
- val period = periodParse.toPeriod
- val modifier = timeSpan.modifier & period.modifier
- val isUnspecified = period.unitAmounts.values.exists(_ == Int.MaxValue)
- if (isUnspecified) {
- this.toUnspecifiedTimeSpan(timeSpan, period, modifier & Modifier.Approx)
- } else {
- this.toTimeSpan(timeSpan, period, modifier)
- }
- }
- }
-
- case class StartAtStartOf(timeSpanParse: TimeSpanParse, periodParse: PeriodParse)
- extends MoveSpanParse(timeSpanParse, periodParse) {
- def toUnspecifiedTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan(timeSpan.start, TimeSpan.unspecifiedEnd, period, modifier)
- }
- def toTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan.startingAt(timeSpan.start, period, modifier)
- }
- }
-
- case class StartAtEndOf(timeSpanParse: TimeSpanParse, periodParse: PeriodParse)
- extends MoveSpanParse(timeSpanParse, periodParse) {
- def toUnspecifiedTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan(timeSpan.end, TimeSpan.unspecifiedEnd, period, modifier)
- }
- def toTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan.startingAt(timeSpan.end, period, modifier)
- }
- }
-
- case class EndAtStartOf(timeSpanParse: TimeSpanParse, periodParse: PeriodParse)
- extends MoveSpanParse(timeSpanParse, periodParse) {
- def toUnspecifiedTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan(TimeSpan.unspecifiedStart, timeSpan.start, period, modifier)
- }
- def toTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan.endingAt(timeSpan.start, period, modifier)
- }
- }
-
- case class MoveEarlier(timeSpanParse: TimeSpanParse, periodParse: PeriodParse)
- extends MoveSpanParse(timeSpanParse, periodParse) {
- def toUnspecifiedTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan(TimeSpan.unspecifiedStart, timeSpan.end, period, modifier)
- }
- def toTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- val start = period.subtractFrom(timeSpan.start)
- val end = period.subtractFrom(timeSpan.end)
- TimeSpan(start, end, timeSpan.period, modifier)
- }
- }
-
- case class MoveLater(timeSpanParse: TimeSpanParse, periodParse: PeriodParse)
- extends MoveSpanParse(timeSpanParse, periodParse) {
- def toUnspecifiedTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- TimeSpan(timeSpan.start, TimeSpan.unspecifiedEnd, period, modifier)
- }
- def toTimeSpan(timeSpan: TimeSpan, period: Period, modifier: Modifier) = {
- val start = period.addTo(timeSpan.start)
- val end = period.addTo(timeSpan.end)
- TimeSpan(start, end, timeSpan.period, modifier)
- }
- }
-
- case class WithModifier(timeSpan: TimeSpanParse, modifier: Modifier) extends TimeSpanParse {
- def toTimeSpan(anchor: TimeSpan) = {
- timeSpan.toTimeSpan(anchor).copy(modifier = modifier)
- }
- }
-}
-
-sealed abstract class TimeSpanSetParse extends TemporalParse {
- def toTimeSpanSet: TimeSpanSet
-}
-
-object TimeSpanSetParse extends CanFail("[TimeSpanSet]") {
-
- def apply(tree: Tree)(implicit tokenParser: TokenParser): TimeSpanSetParse = tree match {
- case tree: Tree.NonTerminal if tree.rule.basicSymbol == "[TimeSpanSet]" => tree.children match {
- case tree :: Nil =>
- TimeSpanSetParse(tree)
- case Tree.Terminal("Simple") :: tree :: Nil =>
- Simple(FieldValueParse(tree).fieldValues)
- case _ => fail(tree)
- }
- case _ => fail(tree)
- }
-
- case class Simple(fields: Map[TemporalField, Int]) extends TimeSpanSetParse {
- def toTimeSpanSet = TimeSpanSet(fields)
- }
-}