diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BraunBlanquetMetric.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BraunBlanquetMetric.java new file mode 100644 index 00000000..dd55578d --- /dev/null +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/BraunBlanquetMetric.java @@ -0,0 +1,98 @@ +package org.apache.ctakes.ytex.kernel.metric; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Compute the Dice metric + * + * + * Described in: + * https://www.sciencedirect.com/science/article/pii/S1532046411000645 Tbl 3, eqn 11 + * + * @author painter + * + */ +public class BraunBlanquetMetric extends BaseSimilarityMetric { + + private static final Log log = LogFactory.getLog(LinMetric.class); + private boolean intrinsicIC = true; + private boolean validCG = false; + private String rootConcept = simSvc.getConceptGraph().getRoot(); + + public boolean isIntrinsicIC() { + return intrinsicIC; + } + + public void setIntrinsicIC(boolean intrinsicIC) { + this.intrinsicIC = intrinsicIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + // don't bother if the concept graph is null + if (!validCG) + return 0d; + + // Compute the IC values for each concept + double ic1 = simSvc.getIC(concept1, this.intrinsicIC); + double ic2 = simSvc.getIC(concept2, this.intrinsicIC); + + // Get the LCS with the lowest IC score + double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo, + this.intrinsicIC); + + // if the corpus IC is 0 and the concept is not the root, then we don't + // have any IC on the concept and can't measure similarity - return 0 + if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1)) + return 0d; + + if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2)) + return 0d; + + // Compute the Braun-Blanquet score + // we just need one of these to be greater than zero + if ( ic1 > 0 || ic2 > 0 ) { + + // max of the individual IC's + double denom = ic1; + if ( ic2 > ic1 ) denom = ic2; + double sim = (lcsIC) / ( denom ); + return sim; + } else { + return 0d; + } + } + + public BraunBlanquetMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) { + super(simSvc); + this.intrinsicIC = intrinsicIC; + this.validCG = simSvc.getConceptGraph() != null; + if (!this.intrinsicIC && validCG) { + rootConcept = simSvc.getConceptGraph().getRoot(); + } + } + +} diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java index bd8e849d..2cfcc8de 100644 --- a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityService.java @@ -29,10 +29,14 @@ public interface ConceptSimilarityService { public enum SimilarityMetricEnum { - LCH(false, false), INTRINSIC_LCH(true, false), LIN(false, true), INTRINSIC_LIN( - true, false), PATH(false, false), INTRINSIC_PATH(true, false), JACCARD( - true, false), SOKAL(true, false), RADA(false, false), INTRINSIC_RADA( - true, false), WUPALMER(false, false), PAGERANK(false, false); + LCH(false, false), INTRINSIC_LCH(true, false), + LIN(false, true), INTRINSIC_LIN(true, false), PATH(false, false), + INTRINSIC_PATH(true, false), JACCARD(true, false), SOKAL(true, false), + RADA(false, false), INTRINSIC_RADA(true, false), WUPALMER(false, false), + PAGERANK(false, false), RESNIK(false, false), INTRINSIC_RESNIK(true, false), + FAITH(false, false), INTRINSIC_FAITH(true, false), DICE(false, true), + SIMPSON(false, true), BRAUN_BLANQUET(false, true), OCHIAI(false, true); + boolean intrinsicIC = false; boolean corpusIC = false; @@ -201,4 +205,4 @@ public List similarity( Map conceptFilter, boolean lcs); public abstract int getDepth(String concept); -} \ No newline at end of file +} diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java index 42f70a50..9c84b383 100644 --- a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java @@ -856,6 +856,24 @@ private void initSimilarityMetricMap() { new JaccardMetric(this)); this.similarityMetricMap.put(SimilarityMetricEnum.WUPALMER, new WuPalmerMetric(this)); + this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_RESNIK, + new ResnikMetric(this, true)); + this.similarityMetricMap.put(SimilarityMetricEnum.RESNIK, + new ResnikMetric(this, false)); + this.similarityMetricMap.put(SimilarityMetricEnum.INTRINSIC_FAITH, + new FaithMetric(this, true)); + this.similarityMetricMap.put(SimilarityMetricEnum.FAITH, + new FaithMetric(this, false)); + this.similarityMetricMap.put(SimilarityMetricEnum.DICE, + new DiceMetric(this, true)); + this.similarityMetricMap.put(SimilarityMetricEnum.SIMPSON, + new SimpsonMetric(this, true)); + this.similarityMetricMap.put(SimilarityMetricEnum.BRAUN_BLANQUET, + new BraunBlanquetMetric(this, true)); + this.similarityMetricMap.put(SimilarityMetricEnum.OCHIAI, + new OchiaiMetric(this, true)); + + } else { this.similarityMetricMap.put(SimilarityMetricEnum.PAGERANK, new PageRankMetric(this, this.getPageRankService())); diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/DiceMetric.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/DiceMetric.java new file mode 100644 index 00000000..6bd0d656 --- /dev/null +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/DiceMetric.java @@ -0,0 +1,93 @@ +package org.apache.ctakes.ytex.kernel.metric; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Compute the Dice metric + * + * + * Described in: + * https://www.sciencedirect.com/science/article/pii/S1532046411000645 Tbl 3, eqn 8 + * + * @author painter + * + */ +public class DiceMetric extends BaseSimilarityMetric { + + private static final Log log = LogFactory.getLog(LinMetric.class); + private boolean intrinsicIC = true; + private boolean validCG = false; + private String rootConcept = simSvc.getConceptGraph().getRoot(); + + public boolean isIntrinsicIC() { + return intrinsicIC; + } + + public void setIntrinsicIC(boolean intrinsicIC) { + this.intrinsicIC = intrinsicIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + // don't bother if the concept graph is null + if (!validCG) + return 0d; + + // Compute the IC values for each concept + double ic1 = simSvc.getIC(concept1, this.intrinsicIC); + double ic2 = simSvc.getIC(concept2, this.intrinsicIC); + + // Get the LCS with the lowest IC score + double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo, + this.intrinsicIC); + + // if the corpus IC is 0 and the concept is not the root, then we don't + // have any IC on the concept and can't measure similarity - return 0 + if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1)) + return 0d; + + if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2)) + return 0d; + + // Compute the Dice score + if ( ic1 > 0 || ic2 > 0 ) { + double sim = (2.0 * lcsIC) / ( ic1 + ic2 ); + return sim; + } else { + return 0d; + } + } + + public DiceMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) { + super(simSvc); + this.intrinsicIC = intrinsicIC; + this.validCG = simSvc.getConceptGraph() != null; + if (!this.intrinsicIC && validCG) { + rootConcept = simSvc.getConceptGraph().getRoot(); + } + } + +} diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/FaithMetric.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/FaithMetric.java new file mode 100644 index 00000000..7c54eb04 --- /dev/null +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/FaithMetric.java @@ -0,0 +1,96 @@ +package org.apache.ctakes.ytex.kernel.metric; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * compute Faith score to provide functionality as found in UMLS::Similarity + * + * UMLS::Similarity::faith.pm + * Module implementing the semantic relatedness measure described + * by Pirro and Euzenat (2010) + * + * Described in: + * https://inria.hal.science/hal-00793283/file/pirro2010b.pdf + * + * @author painter + * + */ +public class FaithMetric extends BaseSimilarityMetric { + + private static final Log log = LogFactory.getLog(LinMetric.class); + private boolean intrinsicIC = true; + private boolean validCG = false; + private String rootConcept = simSvc.getConceptGraph().getRoot(); + + public boolean isIntrinsicIC() { + return intrinsicIC; + } + + public void setIntrinsicIC(boolean intrinsicIC) { + this.intrinsicIC = intrinsicIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + // don't bother if the concept graph is null + if (!validCG) + return 0d; + + // Compute the IC values for each concept + double ic1 = simSvc.getIC(concept1, this.intrinsicIC); + double ic2 = simSvc.getIC(concept2, this.intrinsicIC); + + // Get the LCS with the lowest IC score + double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo, + this.intrinsicIC); + + // if the corpus IC is 0 and the concept is not the root, then we don't + // have any IC on the concept and can't measure similarity - return 0 + if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1)) + return 0d; + + if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2)) + return 0d; + + // Compute the faith score + if ( ic1 > 0 && ic2 > 0 ) { + double sim = (lcsIC) / ( ic1 + ic2 - lcsIC ); + return sim; + } else { + return 0d; + } + } + + public FaithMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) { + super(simSvc); + this.intrinsicIC = intrinsicIC; + this.validCG = simSvc.getConceptGraph() != null; + if (!this.intrinsicIC && validCG) { + rootConcept = simSvc.getConceptGraph().getRoot(); + } + } + +} diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/OchiaiMetric.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/OchiaiMetric.java new file mode 100644 index 00000000..1904d733 --- /dev/null +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/OchiaiMetric.java @@ -0,0 +1,97 @@ +package org.apache.ctakes.ytex.kernel.metric; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Compute the Dice metric + * + * + * Described in: + * https://www.sciencedirect.com/science/article/pii/S1532046411000645 Tbl 3, eqn 9 + * + * @author painter + * + */ +public class OchiaiMetric extends BaseSimilarityMetric { + + private static final Log log = LogFactory.getLog(LinMetric.class); + private boolean intrinsicIC = true; + private boolean validCG = false; + private String rootConcept = simSvc.getConceptGraph().getRoot(); + + public boolean isIntrinsicIC() { + return intrinsicIC; + } + + public void setIntrinsicIC(boolean intrinsicIC) { + this.intrinsicIC = intrinsicIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + // don't bother if the concept graph is null + if (!validCG) + return 0d; + + // Compute the IC values for each concept + double ic1 = simSvc.getIC(concept1, this.intrinsicIC); + double ic2 = simSvc.getIC(concept2, this.intrinsicIC); + + // Get the LCS with the lowest IC score + double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo, + this.intrinsicIC); + + // if the corpus IC is 0 and the concept is not the root, then we don't + // have any IC on the concept and can't measure similarity - return 0 + if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1)) + return 0d; + + if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2)) + return 0d; + + // Compute the Ochiai score + // Both must be greater than zero + if ( ic1 > 0 && ic2 > 0 ) { + + // max of the individual IC's + double denom = Math.sqrt(ic1 * ic2 ); + double sim = (lcsIC) / ( denom ); + return sim; + } else { + return 0d; + } + } + + public OchiaiMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) { + super(simSvc); + this.intrinsicIC = intrinsicIC; + this.validCG = simSvc.getConceptGraph() != null; + if (!this.intrinsicIC && validCG) { + rootConcept = simSvc.getConceptGraph().getRoot(); + } + } + +} diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ResnikMetric.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ResnikMetric.java new file mode 100644 index 00000000..a3b4ba0b --- /dev/null +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ResnikMetric.java @@ -0,0 +1,86 @@ +package org.apache.ctakes.ytex.kernel.metric; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * compute Resnik score to provide functionality as found in UMLS::Similarity + * + * UMLS::Similarity::res.pm + * Module implementing the semantic relatedness measure described + * by Resnik (1995) + * + * @author painter + * + */ +public class ResnikMetric extends BaseSimilarityMetric { + private static final Log log = LogFactory.getLog(LinMetric.class); + private boolean intrinsicIC = true; + private boolean validCG = false; + private String rootConcept = simSvc.getConceptGraph().getRoot(); + + public boolean isIntrinsicIC() { + return intrinsicIC; + } + + public void setIntrinsicIC(boolean intrinsicIC) { + this.intrinsicIC = intrinsicIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + // don't bother if the concept graph is null + if (!validCG) + return 0d; + + // get the minimum lcs of the two concepts + double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo, + this.intrinsicIC); + + // Test for zero + if (lcsIC == 0d) { + return 0d; + } + + // + // Resnik simply returns the minimum IC score of the LCSes + // + // Note: When comparing results to the Perl UMLS::Similarity metric from CPAN + // you would need to specify the "--intrinsic sanchez" method + // to find comparable IC as cTakes is only computing IC in this way + // + return lcsIC; + } + + public ResnikMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) { + super(simSvc); + this.intrinsicIC = intrinsicIC; + this.validCG = simSvc.getConceptGraph() != null; + if (!this.intrinsicIC && validCG) { + rootConcept = simSvc.getConceptGraph().getRoot(); + } + } + +} diff --git a/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/SimpsonMetric.java b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/SimpsonMetric.java new file mode 100644 index 00000000..4213b054 --- /dev/null +++ b/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/SimpsonMetric.java @@ -0,0 +1,96 @@ +package org.apache.ctakes.ytex.kernel.metric; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Compute the Dice metric + * + * + * Described in: + * https://www.sciencedirect.com/science/article/pii/S1532046411000645 Tbl 3, eqn 10 + * + * @author painter + * + */ +public class SimpsonMetric extends BaseSimilarityMetric { + + private static final Log log = LogFactory.getLog(LinMetric.class); + private boolean intrinsicIC = true; + private boolean validCG = false; + private String rootConcept = simSvc.getConceptGraph().getRoot(); + + public boolean isIntrinsicIC() { + return intrinsicIC; + } + + public void setIntrinsicIC(boolean intrinsicIC) { + this.intrinsicIC = intrinsicIC; + } + + @Override + public double similarity(String concept1, String concept2, + Map conceptFilter, SimilarityInfo simInfo) { + // don't bother if the concept graph is null + if (!validCG) + return 0d; + + // Compute the IC values for each concept + double ic1 = simSvc.getIC(concept1, this.intrinsicIC); + double ic2 = simSvc.getIC(concept2, this.intrinsicIC); + + // Get the LCS with the lowest IC score + double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo, + this.intrinsicIC); + + // if the corpus IC is 0 and the concept is not the root, then we don't + // have any IC on the concept and can't measure similarity - return 0 + if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1)) + return 0d; + + if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2)) + return 0d; + + // Compute the Simpson score + // Both must be greater than zero + if ( ic1 > 0 && ic2 > 0 ) { + double denom = ic1; + if ( ic2 < ic1 ) denom = ic2; + double sim = (lcsIC) / ( denom ); + return sim; + } else { + return 0d; + } + } + + public SimpsonMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) { + super(simSvc); + this.intrinsicIC = intrinsicIC; + this.validCG = simSvc.getConceptGraph() != null; + if (!this.intrinsicIC && validCG) { + rootConcept = simSvc.getConceptGraph().getRoot(); + } + } + +}