{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE Safe #-}

----------------------------------------------------------------------------
-- |
-- Module      :  Text.EditDistance
-- Copyright   :  (C) 2010-2015 Maximilian Bolingbroke
-- License     :  BSD-3-Clause (see the file LICENSE)
--
-- Maintainer  :  Oleg Grenrus <oleg.grenrus@iki.fi>
--
-- Computing the edit distances between strings
----------------------------------------------------------------------------
module Text.EditDistance (
        Costs(..), EditCosts(..), defaultEditCosts,
        levenshteinDistance, restrictedDamerauLevenshteinDistance
    ) where

import Text.EditDistance.EditCosts
import qualified Text.EditDistance.Bits as Bits
import qualified Text.EditDistance.SquareSTUArray as SquareSTUArray

-- | Find the Levenshtein edit distance between two strings.  That is to say, the number of deletion,
-- insertion and substitution operations that are required to make the two strings equal.  Note that
-- this algorithm therefore does not make use of the @transpositionCost@ field of the costs. See also:
-- <http://en.wikipedia.org/wiki/Levenshtein_distance>.
levenshteinDistance :: EditCosts -> String -> String -> Int
levenshteinDistance :: EditCosts -> String -> String -> Int
levenshteinDistance EditCosts
costs String
str1 String
str2
  | EditCosts -> Bool
isDefaultEditCosts EditCosts
costs
  , (Int
str1_len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
64) Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
== (Int
str2_len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
64)                             -- The Integer implementation of the Bits algorithm is quite inefficient, but scales better than the
  = Int -> Int -> String -> String -> Int
Bits.levenshteinDistanceWithLengths Int
str1_len Int
str2_len String
str1 String
str2  -- STUArrays if both string lengths > 64. The Word64 implementation is always better, if it is applicable
  | Bool
otherwise
  = EditCosts -> Int -> Int -> String -> String -> Int
SquareSTUArray.levenshteinDistanceWithLengths EditCosts
costs Int
str1_len Int
str2_len String
str1 String
str2 -- SquareSTUArray usually beat making more use of the heap with STUArray
  where
    str1_len :: Int
str1_len = String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str1
    str2_len :: Int
str2_len = String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str2

-- | Find the \"restricted\" Damerau-Levenshtein edit distance between two strings.  This algorithm calculates the cost of
-- the so-called optimal string alignment, which does not always equal the appropriate edit distance. The cost of the optimal
-- string alignment is the number of edit operations needed to make the input strings equal under the condition that no substring
-- is edited more than once.  See also: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
restrictedDamerauLevenshteinDistance :: EditCosts -> String -> String -> Int
restrictedDamerauLevenshteinDistance :: EditCosts -> String -> String -> Int
restrictedDamerauLevenshteinDistance EditCosts
costs String
str1 String
str2
  | EditCosts -> Bool
isDefaultEditCosts EditCosts
costs
  , (Int
str1_len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
64) Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
== (Int
str2_len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
64)                                             -- The Integer implementation of the Bits algorithm is quite inefficient, but scales better than the
  = Int -> Int -> String -> String -> Int
Bits.restrictedDamerauLevenshteinDistanceWithLengths Int
str1_len Int
str2_len String
str1 String
str2 -- STUArrays if both string lengths > 64. The Word64 implementation is always better, if it is applicable
  | Bool
otherwise
  = EditCosts -> Int -> Int -> String -> String -> Int
SquareSTUArray.restrictedDamerauLevenshteinDistanceWithLengths EditCosts
costs Int
str1_len Int
str2_len String
str1 String
str2 -- SquareSTUArray usually beat making more use of the heap with STUArray
  where
    str1_len :: Int
str1_len = String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str1
    str2_len :: Int
str2_len = String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str2