/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.misc;

import org.apache.lucene.search.similarities.ClassicSimilarity;

A similarity with a lengthNorm that provides for a "plateau" of equally good lengths, and tf helper functions.

For lengthNorm, A min/max can be specified to define the plateau of lengths that should all have a norm of 1.0. Below the min, and above the max the lengthNorm drops off in a sqrt function.

For tf, baselineTf and hyperbolicTf functions are provided, which subclasses can choose between.

See Also:
/** * <p> * A similarity with a lengthNorm that provides for a "plateau" of * equally good lengths, and tf helper functions. * </p> * <p> * For lengthNorm, A min/max can be specified to define the * plateau of lengths that should all have a norm of 1.0. * Below the min, and above the max the lengthNorm drops off in a * sqrt function. * </p> * <p> * For tf, baselineTf and hyperbolicTf functions are provided, which * subclasses can choose between. * </p> * * @see <a href="doc-files/ss.gnuplot">A Gnuplot file used to generate some of the visualizations referenced from each function.</a> */
public class SweetSpotSimilarity extends ClassicSimilarity { private int ln_min = 1; private int ln_max = 1; private float ln_steep = 0.5f; private float tf_base = 0.0f; private float tf_min = 0.0f; private float tf_hyper_min = 0.0f; private float tf_hyper_max = 2.0f; private double tf_hyper_base = 1.3d; private float tf_hyper_xoffset = 10.0f; public SweetSpotSimilarity() { super(); }
Sets the baseline and minimum function variables for baselineTf
See Also:
  • baselineTf
/** * Sets the baseline and minimum function variables for baselineTf * * @see #baselineTf */
public void setBaselineTfFactors(float base, float min) { tf_min = min; tf_base = base; }
Sets the function variables for the hyperbolicTf functions
Params:
  • min – the minimum tf value to ever be returned (default: 0.0)
  • max – the maximum tf value to ever be returned (default: 2.0)
  • base – the base value to be used in the exponential for the hyperbolic function (default: 1.3)
  • xoffset – the midpoint of the hyperbolic function (default: 10.0)
See Also:
/** * Sets the function variables for the hyperbolicTf functions * * @param min the minimum tf value to ever be returned (default: 0.0) * @param max the maximum tf value to ever be returned (default: 2.0) * @param base the base value to be used in the exponential for the hyperbolic function (default: 1.3) * @param xoffset the midpoint of the hyperbolic function (default: 10.0) * @see #hyperbolicTf */
public void setHyperbolicTfFactors(float min, float max, double base, float xoffset) { tf_hyper_min = min; tf_hyper_max = max; tf_hyper_base = base; tf_hyper_xoffset = xoffset; }
Sets the default function variables used by lengthNorm when no field specific variables have been set.
See Also:
  • lengthNorm
/** * Sets the default function variables used by lengthNorm when no field * specific variables have been set. * * @see #lengthNorm */
public void setLengthNormFactors(int min, int max, float steepness, boolean discountOverlaps) { this.ln_min = min; this.ln_max = max; this.ln_steep = steepness; this.discountOverlaps = discountOverlaps; }
Implemented as: 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 ) .

This degrades to 1/sqrt(x) when min and max are both 1 and steepness is 0.5

:TODO: potential optimization is to just flat out return 1.0f if numTerms is between min and max.

See Also:
/** * Implemented as: * <code> * 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 ) * </code>. * * <p> * This degrades to <code>1/sqrt(x)</code> when min and max are both 1 and * steepness is 0.5 * </p> * * <p> * :TODO: potential optimization is to just flat out return 1.0f if numTerms * is between min and max. * </p> * * @see #setLengthNormFactors * @see <a href="doc-files/ss.computeLengthNorm.svg">An SVG visualization of this function</a> */
@Override public float lengthNorm(int numTerms) { final int l = ln_min; final int h = ln_max; final float s = ln_steep; return (float) (1.0f / Math.sqrt ( ( s * (float)(Math.abs(numTerms - l) + Math.abs(numTerms - h) - (h-l)) ) + 1.0f ) ); }
Delegates to baselineTf
See Also:
  • baselineTf
/** * Delegates to baselineTf * * @see #baselineTf */
@Override public float tf(float freq) { return baselineTf(freq); }
Implemented as: (x <= min) ? base : sqrt(x+(base**2)-min) ...but with a special case check for 0.

This degrates to sqrt(x) when min and base are both 0

See Also:
/** * Implemented as: * <code> * (x &lt;= min) &#63; base : sqrt(x+(base**2)-min) * </code> * ...but with a special case check for 0. * <p> * This degrates to <code>sqrt(x)</code> when min and base are both 0 * </p> * * @see #setBaselineTfFactors * @see <a href="doc-files/ss.baselineTf.svg">An SVG visualization of this function</a> */
public float baselineTf(float freq) { if (0.0f == freq) return 0.0f; return (freq <= tf_min) ? tf_base : (float)Math.sqrt(freq + (tf_base * tf_base) - tf_min); }
Uses a hyperbolic tangent function that allows for a hard max... tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1)

This code is provided as a convenience for subclasses that want to use a hyperbolic tf function.

See Also:
/** * Uses a hyperbolic tangent function that allows for a hard max... * * <code> * tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1) * </code> * * <p> * This code is provided as a convenience for subclasses that want * to use a hyperbolic tf function. * </p> * * @see #setHyperbolicTfFactors * @see <a href="doc-files/ss.hyperbolicTf.svg">An SVG visualization of this function</a> */
public float hyperbolicTf(float freq) { if (0.0f == freq) return 0.0f; final float min = tf_hyper_min; final float max = tf_hyper_max; final double base = tf_hyper_base; final float xoffset = tf_hyper_xoffset; final double x = (double)(freq - xoffset); final float result = min + (float)( (max-min) / 2.0f * ( ( ( Math.pow(base,x) - Math.pow(base,-x) ) / ( Math.pow(base,x) + Math.pow(base,-x) ) ) + 1.0d ) ); return Float.isNaN(result) ? max : result; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("SweetSpotSimilarity") .append('(').append("ln_min=").append(ln_min).append(", ") .append("ln_max=").append(ln_max).append(", ") .append("ln_steep=").append(ln_steep).append(", ") .append("tf_base=").append(tf_base).append(", ") .append("tf_min=").append(tf_min).append(", ") .append("tf_hyper_min=").append(tf_hyper_min).append(", ") .append("tf_hyper_max=").append(tf_hyper_max).append(", ") .append("tf_hyper_base=").append(tf_hyper_base).append(", ") .append("tf_hyper_xoffset=").append(tf_hyper_xoffset) .append(")"); return sb.toString(); } }