
    9e$                       d dl mZ d dlmZ d dlmZmZmZ d dlm	Z	 d dl
mZ ddlmZmZ ddlmZ g d	Z G d
 de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z e       Z e       Z e       Z e       Z e       Z  e       Z! e       Z" e       Z# e       Z$ e       Z%y)    )annotations)reduce)islicepermutationsrepeat)log)Sequence   )BaseBaseSimilarity)DamerauLevenshtein)JaccardSorensenTverskyOverlapCosineTanimoto
MongeElkanBagjaccardsorensentverskysorensen_diceoverlapcosinetanimotomonge_elkanbagc                  >    e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 ddZddZd	dZy)
r   aP  
    Compute the Jaccard similarity between the two sequences.
    They should contain hashable items.
    The return value is a float between 0 and 1, where 1 means equal,
    and 0 totally different.

    https://en.wikipedia.org/wiki/Jaccard_index
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaccard.js
    c                .    || _         || _        || _        y Nqvalas_setexternalselfr#   r$   r%   s       E/usr/lib/python3/dist-packages/textdistance/algorithms/token_based.py__init__zJaccard.__init__"        	     c                     yNr
    r'   	sequencess     r(   maximumzJaccard.maximum,       r+   c                     | j                   | }||S  | j                  | } | j                  | }| j                  |      } | j                  | }| j                  |      }||z  S r!   )quick_answer_get_counters_intersect_counters_count_counters_union_counters)r'   r0   resultintersectionunions        r(   __call__zJaccard.__call__/   s    """I.M&D&&	2	/t//;++L9$$$i0$$U+e##r+   Nr
   FTr#   intr$   boolr%   r@   returnNoner0   r	   rA   r?   r0   r	   rA   float__name__
__module____qualname____doc__r)   r1   r<   r.   r+   r(   r   r      sG     	!! ! 	!
 
!
$r+   r   c                  *    e Zd ZdZdddZddZd	dZy)
r   ae  
    Compute the Sorensen distance between the two sequences.
    They should contain hashable items.
    The return value is a float between 0 and 1, where 0 means equal,
    and 1 totally different.

    https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/dice.js
    c                .    || _         || _        || _        y r!   r"   r&   s       r(   r)   zSorensen.__init__G   s    	 r+   c                     yr-   r.   r/   s     r(   r1   zSorensen.maximumL   r2   r+   c                       j                   | }||S   j                  | }t         fd|D              }  j                  | } j	                  |      }d|z  |z  S )Nc              3  @   K   | ]  }j                  |        y wr!   r7   ).0sr'   s     r(   	<genexpr>z$Sorensen.__call__.<locals>.<genexpr>U   s     ?D((+?s   g       @)r4   r5   sumr6   r7   )r'   r0   r9   countr:   s   `    r(   r<   zSorensen.__call__O   st    """I.M&D&&	2	?Y??/t//;++L9\!E))r+   Nr=   r>   rC   rD   rF   r.   r+   r(   r   r   <   s    !
	*r+   r   c                  J    e Zd ZdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZddZd	dZy)
r   zTversky index

    https://en.wikipedia.org/wiki/Tversky_index
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/tversky.js
    Nc                d    || _         |xs t        d      | _        || _        || _        || _        y r-   )r#   r   ksbiasr$   r%   )r'   r#   rX   rY   r$   r%   s         r(   r)   zTversky.__init__b   s/     	/q		 r+   c                     yr-   r.   r/   s     r(   r1   zTversky.maximump   r2   r+   c                4    | j                   | }||S  | j                  | } | j                  | }| j                  |      }|D cg c]  }| j                  |       }}t	        t        | j                  t        |                  }t        |      dk7  s| j                  &|}t        ||      D ]  \  }}||||z
  z  z  } ||z  S |\  }}	|\  }
}t        ||	g      }t        ||	g      }|| j                  z   }|
|z  ||z
  z  ||z  z   }|||z   z  S c c}w )N   )r4   r5   r6   r7   listr   rX   lenrY   zipminmax)r'   r0   quick_resultr:   rR   rX   r9   ks1s2alphabetaa_valb_valc_vals                  r(   r<   zTversky.__call__s   sH   (t(()4#&D&&	2	/t//;++L96?@T))!,@	@&#i.12y>Q$))"3!FB	* 11!q</001&((BtRHRHtyy(/%$,>'' As   D)r
   NNFT)r#   r?   rX   zSequence[float]rY   zfloat | Noner$   r@   r%   r@   rA   rB   rC   rD   rF   r.   r+   r(   r   r   [   sa     "!!! ! 	!
 ! ! 
!(r+   r   c                  >    e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 ddZddZd	dZy)
r   zoverlap coefficient

    https://en.wikipedia.org/wiki/Overlap_coefficient
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/overlap.js
    c                .    || _         || _        || _        y r!   r"   r&   s       r(   r)   zOverlap.__init__   r*   r+   c                     yr-   r.   r/   s     r(   r1   zOverlap.maximum   r2   r+   c                     | j                   | }||S  | j                  | } | j                  | }| j                  |      }|D cg c]  }| j                  |       }}|t	        |      z  S c c}w r!   )r4   r5   r6   r7   r`   )r'   r0   r9   r:   rR   s        r(   r<   zOverlap.__call__   s    """I.M&D&&	2	/t//;++L96?@T))!,@	@c)n,, As   A/Nr=   r>   rC   rD   rF   r.   r+   r(   r   r      sG     	!! ! 	!
 
!
-r+   r   c                  >    e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 ddZddZd	dZy)
r   zcosine similarity (Ochiai coefficient)

    https://en.wikipedia.org/wiki/Cosine_similarity
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/cosine.js
    c                .    || _         || _        || _        y r!   r"   r&   s       r(   r)   zCosine.__init__   r*   r+   c                     yr-   r.   r/   s     r(   r1   zCosine.maximum   r2   r+   c                    | j                   | }||S  | j                  | } | j                  | }| j                  |      }|D cg c]  }| j                  |       }}t	        d |      }|t        |dt        |      z        z  S c c}w )Nc                    | |z  S r!   r.   )xys     r(   <lambda>z!Cosine.__call__.<locals>.<lambda>   s
    1q5 r+   g      ?)r4   r5   r6   r7   r   powr^   )r'   r0   r9   r:   rR   prods         r(   r<   zCosine.__call__   s    """I.M&D&&	2	/t//;++L96?@T))!,@	@()4c$c)n(<=== As   B	Nr=   r>   rC   rD   rF   r.   r+   r(   r   r      sG     	!! ! 	!
 
!>r+   r   c                  $     e Zd ZdZd fdZ xZS )r   zTanimoto distance
    This is identical to the Jaccard similarity coefficient
    and the Tversky index for alpha=1 and beta=1.
    c                T    t        |   | }|dk(  rt        d      S t        |d      S )Nr   -infr\   )superr<   rE   r   )r'   r0   r9   	__class__s      r(   r<   zTanimoto.__call__   s/    !9-Q;= vq>!r+   rD   )rG   rH   rI   rJ   r<   __classcell__)r}   s   @r(   r   r      s    
" "r+   r   c                  V    e Zd ZdZ e       Zedddf	 	 	 	 	 	 	 d
dZddZddZddZ	y	)r   a  
    https://www.academia.edu/200314/Generalized_Monge-Elkan_Method_for_Approximate_Text_String_Comparison
    http://www.cs.cmu.edu/~wcohen/postscript/kdd-2003-match-ws.pdf
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/monge-elkan.js
    Fr
   Tc                <    || _         || _        || _        || _        y r!   )	algorithm	symmetricr#   r%   )r'   r   r   r#   r%   s        r(   r)   zMongeElkan.__init__   s      #"	 r+   c                    | j                   j                  |      }|D ](  }|st        | | j                   j                  |       }* |S r!   )r   r1   ra   )r'   r0   r9   seqs       r(   r1   zMongeElkan.maximum   sO    ''	2 	CCV%;T^^%;%;S%AB	C r+   c           
         |syg }|D ]R  }|D ]K  }t        d      }|D ](  }t        || j                  j                  ||            }* |j	                  |       M T t        |      t        |      z  t        |      z  S )Nr   r{   )rE   ra   r   
similarityappendrT   r^   )r'   r   r0   maxesc1rR   max_simc2s           r(   _calczMongeElkan._calc   s     	&B &- NB!'4>>+D+DR+LMGNW%	&	& 5zCH$s5z11r+   c                
    | j                   | }||S  | j                  | }| j                  rGg }t        |      D ]   }|j	                   | j
                  |        " t        |      t        |      z  S  | j
                  | S r!   )r4   _get_sequencesr   r   r   r   rT   r^   )r'   r0   rb   r9   seqss        r(   r<   zMongeElkan.__call__  s    (t(()4#'D''3	>>F$Y/ 1jdjj$/01v;V,,4::y))r+   N)r   r@   r#   r?   r%   r@   rA   rB   rD   )
rG   rH   rI   rJ   r   _damerau_levenshteinr)   r1   r   r<   r.   r+   r(   r   r      s\    
 ./ '
! 
! 	
!
 
! 

!
2*r+   r   c                      e Zd ZdZddZy)r   z^Bag distance
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/bag.js
    c                l       j                   | }  j                  | t         fd|D              S )Nc              3  F   K   | ]  }j                  |z
          y wr!   rP   )rQ   sequencer:   r'   s     r(   rS   zBag.__call__.<locals>.<genexpr>  s!     [X4''<(?@[s   !)r5   r6   ra   )r'   r0   r:   s   ` @r(   r<   zBag.__call__  s9    &D&&	2	/t//;[QZ[[[r+   NrD   )rG   rH   rI   rJ   r<   r.   r+   r(   r   r     s    \r+   r   N)&
__future__r   	functoolsr   	itertoolsr   r   r   mathr   typingr	   baser   _Baser   _BaseSimilarity
edit_basedr   __all__r   r   r   r   r   r   r   r   r   r   dicer   r   r   r   r   r   r   r.   r+   r(   <module>r      s    "  2 2   C *"$o "$J* *>/(o /(d-o -B>_ >D"w "3* 3*l\% \ 
e	z
)l
):
:
)r+   