
    9ek                       d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	 ddl
mZmZ ddlmZmZ 	 d dlZg d	Z e	d
      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z e       Z  e       Z! e       xZ"Z# e       Z$ e       Z% e       Z& e       Z' e       Z( e       Z) e       Z*y# e$ r dZY w xY w)    )annotations)defaultdict)zip_longest)AnySequenceTypeVar   )BaseBaseSimilarity)SimFuncTestFuncN)HammingMLIPNSLevenshteinDamerauLevenshteinJaroJaroWinklerStrCmp95NeedlemanWunschGotohSmithWatermanhammingmlipnslevenshteindamerau_levenshteinjarojaro_winklerstrcmp95needleman_wunschgotohsmith_watermanTc                  <    e Zd ZdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZy)r   z
    Compute the Hamming distance between the two or more sequences.
    The Hamming distance is the number of differing items in ordered sequences.

    https://en.wikipedia.org/wiki/Hamming_distance
    Nc                X    || _         |xs | j                  | _        || _        || _        y N)qval_ident	test_functruncateexternal)selfr&   r(   r)   r*   s        D/usr/lib/python3/dist-packages/textdistance/algorithms/edit_based.py__init__zHamming.__init__*   s)     	"1dkk      c                       j                   | }  j                  | }|t        |t              sJ |S  j                  rt
        nt        }t         fd || D              S )Nc              3  >   K   | ]  } j                   |    y wr%   )r(   ).0esr+   s     r,   	<genexpr>z#Hamming.__call__.<locals>.<genexpr>?   s     Fr~t~~r**Fs   )_get_sequencesquick_answer
isinstanceintr)   zipr   sum)r+   	sequencesresult_zips   `   r,   __call__zHamming.__call__6   se    'D''3	"""I.fc***MmmsFT95EFFFr.   )r	   NFT)
r&   r7   r(   TestFunc | Noner)   boolr*   r?   returnNoner:   Sequence[object]r@   r7   )__name__
__module____qualname____doc__r-   r=    r.   r,   r   r   "   sP     %)
!
! #
! 	
!
 
! 

!	Gr.   r   c                  F    e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 ddZd	dZd	dZd	dZy)
r   a  
    Compute the absolute Levenshtein distance between the two sequences.
    The Levenshtein distance is the minimum number of edit operations necessary
    for transforming one sequence into the other. The edit operations allowed are:

        * deletion:     ABC -> BC, AC, AB
        * insertion:    ABC -> ABCD, EABC, AEBC..
        * substitution: ABC -> ABE, ADC, FBC..

    https://en.wikipedia.org/wiki/Levenshtein_distance
    TODO: https://gist.github.com/kylebgorman/1081951/9b38b7743a3cb5167ab2c6608ac8eea7fc629dca
    Nc                J    || _         |xs | j                  | _        || _        y r%   )r&   r'   r(   r*   )r+   r&   r(   r*   s       r,   r-   zLevenshtein.__init__P   s"     	"1dkk r.   c           	        |r|st        |      t        |      z   S | j                  |d   |d         r | |d d |d d       S t         | |d d |       | ||d d             } | |d d |d d       }t        ||      dz   S )Nr	   )lenr(   min)r+   s1s2dss        r,   
_recursivezLevenshtein._recursiveZ   s    r7SW$$>>"R&"R&)3BCR)) CR"RW

 CR"Sb'"1ay1}r.   c                   t        |      dz   }t        |      dz   }d}t        rt        j                  |      }nt        |      }t        d|      D ]o  }||gdg|dz
  z  z   }}t        d|      D ]O  }||   dz   }	||dz
     dz   }
| j	                  ||dz
     ||dz
           }||dz
     | z   }t        ||	|
      ||<   Q q |d   S )zp
        source:
        https://github.com/jamesturk/jellyfish/blob/master/jellyfish/_jellyfish.py#L18
        r	   Nr   rL   )rM   numpyarangeranger(   rN   )r+   rO   rP   rowscolsprevcurrcdeletion	insertiondistedits                r,   _cycledzLevenshtein._cycledk   s    
 2w{2w{,,t$C+Cq$ 	8AaSA3$(#33#D1d^ 87Q;AJN	~~bQiAE;AE{$h/T8Y7A8	8 2wr.   c                    | j                  ||      \  }}| j                  ||      }|t        |t              sJ |S | j	                  ||      S r%   )r4   r5   r6   r7   rb   r+   rO   rP   r;   s       r,   r=   zLevenshtein.__call__   sV    $$R,B""2r*fc***M||B##r.   )r	   NT)r&   r7   r(   r>   r*   r?   r@   rA   rO   Sequence[T]rP   rf   r@   r7   )rD   rE   rF   rG   r-   rS   rb   r=   rH   r.   r,   r   r   B   sL     %)	!! #! 	!
 
!"0$r.   r   c                  T    e Zd ZdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 d	dZd
dZd
dZd
dZd
dZy)r   a  
    Compute the absolute Damerau-Levenshtein distance between the two sequences.
    The Damerau-Levenshtein distance is the minimum number of edit operations necessary
    for transforming one sequence into the other. The edit operations allowed are:

        * deletion:      ABC -> BC, AC, AB
        * insertion:     ABC -> ABCD, EABC, AEBC..
        * substitution:  ABC -> ABE, ADC, FBC..
        * transposition: ABC -> ACB, BAC

    If `restricted=False`, it will calculate unrestricted distance,
    where the same character can be touched more than once.
    So the distance between BA and ACB is 2: BA -> AB -> ACB.

    https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
    Nc                X    || _         |xs | j                  | _        || _        || _        y r%   )r&   r'   r(   r*   
restricted)r+   r&   r(   r*   ri   s        r,   r-   zDamerauLevenshtein.__init__   s)     	"1dkk $r.   c           	        t        j                  t        |      dz   t        |      dz   gt              }t	        dt        |      dz         D ]  }|dz   ||   d<    t	        dt        |      dz         D ]  }|dz   |d   |<    t        |      D ]  \  }}t        |      D ]  \  }}t        | j                  ||             }t        ||dz
     |   dz   ||   |dz
     dz   ||dz
     |dz
     |z         ||   |<   |r|sa| j                  |||dz
           szt        ||   |   ||dz
     |dz
     |z         ||   |<     |t        |      dz
     t        |      dz
     S )Nr	   dtyperL      )rU   zerosrM   r7   rW   	enumerater(   rN   	r+   rO   rP   rQ   ijcs1cs2costs	            r,   _numpyzDamerauLevenshtein._numpy   s   KKR1c"gk2#> r3r7Q;' 	A1uAaDH	r3r7Q;' 	A1uAbE!H	  m 	FAs#B- 3t~~c3778 a!eHQK!OaDQK!Oa!eHQUOd*!Q ~~c2a!e95aDGa!eHQUOd*!Q	* R1~c"gk**r.   c                ^   i }i }t        |      }t        |      }||z   }||d<   t        t        |      dz         D ]  }|||df<   |||df<    t        t        |      dz         D ]  }	||d|	f<   |	|d|	f<    t        |d      D ]  \  }}
d}t        |d      D ]  \  }	}|j                  |d      }|}| j	                  |
|      rd}|	}nd}t        ||dz
  |	dz
  f   |z   |||	dz
  f   dz   ||dz
  |	f   dz   ||dz
  |dz
  f   ||z
  z   dz
  |	|z
  z         |||	f<    |||
<    |||f   S )zKhttps://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
        )rL   rL   r	   rL   r   start)rM   rW   ro   getr(   rN   )r+   rO   rP   rQ   dalen1len2maxdistrq   rr   rs   dbrt   i1j1ru   s                   r,   _pure_python_unrestrictedz,DamerauLevenshtein._pure_python_unrestricted   s    )+2w2w+&	 s2w{# 	AAaeHAadG	 s2w{# 	AAb!eHAadG	  !, 	FAsB#Ba0 3VVC^>>#s+DBDa!eQUlOd*aQhK!Oa!eQhK!Ob1fb1fn%R014B?	!Q$ BsG#	& t}r.   c           
        i }t        dt        |      dz         D ]  }|dz   ||df<    t        dt        |      dz         D ]  }|dz   |d|f<    t        |      D ]  \  }}t        |      D ]  \  }}t        | j	                  ||             }t        ||dz
  |f   dz   |||dz
  f   dz   ||dz
  |dz
  f   |z         |||f<   |r|s]| j	                  |||dz
           sv| j	                  ||dz
     |      st        |||f   ||dz
  |dz
  f   |z         |||f<     |t        |      dz
  t        |      dz
  f   S )za
        https://www.guyrutenberg.com/2008/12/15/damerau-levenshtein-distance-in-python/
        rL   r	   rm   )rW   rM   ro   r7   r(   rN   rp   s	            r,   _pure_python_restrictedz*DamerauLevenshtein._pure_python_restricted   s    )+ r3r7Q;' 	A1uAaeH	r3r7Q;' 	A1uAb!eH	  m 	FAs#B- 3t~~c3778 a!eQhK!OaQhK!Oa!eQUlOd*!Q$ ~~c2a!e95~~bQi5adGa!eQUlOd*!Q$#	. R1c"gk)**r.   c                    | j                  ||      \  }}| j                  ||      }||S | j                  r| j                  ||      S | j	                  ||      S r%   )r4   r5   ri   r   r   rd   s       r,   r=   zDamerauLevenshtein.__call__  sd    $$R,B""2r*M
 ??//B77--b"55r.   )r	   NTT)
r&   r7   r(   r>   r*   r?   ri   r?   r@   rA   re   )	rD   rE   rF   rG   r-   rv   r   r   r=   rH   r.   r,   r   r      sa    & %)
%
% #
% 	
%
 
% 

%+B'R#+J6r.   r   c                  F    e Zd ZdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZd	d
dZy)r   a  
    Computes the Jaro-Winkler measure between two strings.
    The Jaro-Winkler measure is designed to capture cases where two strings
    have a low Jaro score, but share a prefix.
    and thus are likely to match.

    https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaro.js
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaro-winkler.js
    c                <    || _         || _        || _        || _        y r%   )r&   long_tolerance
winklerizer*   )r+   r   r   r&   r*   s        r,   r-   zJaroWinkler.__init__6  s!     	,$ r.   c                     yNr	   rH   r+   r:   s     r,   maximumzJaroWinkler.maximumB      r.   c                   | j                  ||      \  }}| j                  ||      }||S t        |      }t        |      }|r|syt        ||      }t	        ||      }|dz  dz
  }|dk  rd}dg|z  }	dg|z  }
d}t        |      D ]Y  \  }}t	        d||z
        }t        ||z   |dz
        }t        ||dz         D ]!  }|
|   r	||   |k(  sdx|	|<   |
|<   |dz  } Y [ |sydx}}t        |	      D ]6  \  }}|s	t        ||      D ]  }|
|   s	|dz   } n ||   |   k7  s2|dz  }8 |dz  }||z  ||z  z   }|||z
  |z  z  }|dz  }| j                  s|S |dk  r|S t        |d	      }d}||k  r!||   ||   k(  r|dz  }||k  r||   ||   k(  r|r|||z  d
|z
  z  z  }| j                  r|d	k  r|S ||dz   k  sd|z  ||z   k  r|S ||z
  dz
  ||z   |dz  z
  dz   z  }|d
|z
  |z  z  }|S )N        rm   r	   r   FT   ffffff?         ?)	r4   r5   rM   rN   maxro   rW   r   r   )r+   rO   rP   prefix_weightr;   s1_lens2_lenmin_lensearch_ranges1_flagss2_flagscommon_charsrq   s1_chlowhirr   ktrans_counts1_fweighttmps                         r,   r=   zJaroWinkler.__call__E  s   $$R,B""2r*MRRVff%66*$)Q.!L7V#7V# !" 	HAua\)*CQ%vz2B3Q' {r!u~044HQK(1+ A%L		  K * 	%GAtq&) A{E a5BqE>1$K	% 	 &)>><+-==! MS=M O!e1AFA !e1Aa-'3<88F
 ""glM1q5 A$4w{$BMa!#!a%(?!(CD3<3&&r.   N)FTr	   T)
r   r?   r   r?   r&   r7   r*   r?   r@   rA   rB   )皙?)rO   rf   rP   rf   r   floatr@   r   rD   rE   rF   rG   r-   r   r=   rH   r.   r,   r   r   *  sU    	  %
!
! 
! 	
!
 
! 

!Nr.   r   c                  6     e Zd Z	 	 	 d	 	 	 	 	 	 	 d fdZ xZS )r   c                ,    t         |   |d||       y )NF)r   r   r&   r*   )superr-   )r+   r   r&   r*   	__class__s       r,   r-   zJaro.__init__  s#     	)	 	 	
r.   )Fr	   T)r   r?   r&   r7   r*   r?   r@   rA   )rD   rE   rF   r-   __classcell__)r   s   @r,   r   r     s>      %	

 
 	

 

 
r.   r   c                  d    e Zd ZdZ	 	 	 	 d
	 	 	 	 	 	 	 	 	 ddZddZddZddZddZddZ	dd	Z
y)r   a0  
    Computes the Needleman-Wunsch measure between two strings.
    The Needleman-Wunsch generalizes the Levenshtein distance and considers global
    alignment between two strings. Specifically, it is computed by assigning
    a score to each alignment between two input strings and choosing the
    score of the best alignment, that is, the maximal score.
    An alignment between two strings is a set of correspondences between the
    characters of between them, allowing for gaps.

    https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
    Nc                r    || _         || _        |r|| _        || _        y | j                  | _        || _        y r%   )r&   gap_costsim_funcr'   r*   r+   r   r   r&   r*   s        r,   r-   zNeedlemanWunsch.__init__  s9     	 $DM ! !KKDM r.   c                P    t        t        t        |             | j                  z  S r%   )r   maprM   r   r   s     r,   minimumzNeedlemanWunsch.minimum  s     CY'((4==88r.   c                4    t        t        t        |            S r%   )r   r   rM   r   s     r,   r   zNeedlemanWunsch.maximum      3sI&''r.   c                &    d | j                   | z  S )z'Get distance between sequences
        rL   )
similarityr   s     r,   distancezNeedlemanWunsch.distance  s     ODOOY///r.   c                z     | j                   | } | j                  | }|dk(  ry | j                  | |z
  ||z
  z  S )z!Get distance from 0 to 1
        r   )r   r   r   r+   r:   r   r   s       r,   normalized_distancez#NeedlemanWunsch.normalized_distance  sN     $,,	*$,,	*a<y)G3'8IJJr.   c                z     | j                   | } | j                  | }|dk(  ry | j                  | |z
  |dz  z  S )z#Get similarity from 0 to 1
        r   r	   rm   )r   r   r   r   s       r,   normalized_similarityz%NeedlemanWunsch.normalized_similarity  sM     $,,	*$,,	*a<+g5'A+FFr.   c                   t         st        d      | j                  ||      \  }}t        j                  t	        |      dz   t	        |      dz   ft
              }t        t	        |      dz         D ]  }|| j                  z   ||df<    t        t	        |      dz         D ]  }|| j                  z   |d|f<    t        |d      D ]y  \  }}t        |d      D ]e  \  }}||dz
  |dz
  f   | j                  ||      z   }||dz
  |f   | j                  z
  }	|||dz
  f   | j                  z
  }
t        ||	|
      |||f<   g { ||j                  d   dz
  |j                  d   dz
  f   S )Nz2Please, install numpy for Needleman-Wunsch measurer	   rk   r   )rU   ImportErrorr4   rn   rM   r   rW   r   ro   r   r   shape)r+   rO   rP   dist_matrq   rr   c1c2matchdeleteinserts              r,   r=   zNeedlemanWunsch.__call__  s   RSS$$R,B ;;Wq[#b'A+&

 s2w{# 	2A 4==01HQTN	2 s2w{# 	2A 4==01HQTN	2 r1% 	<EAr"2q) <2 QA.r21FF!!a%(+dmm;!!QU(+dmm;!$UFF!;A	<	< q)A-x~~a/@1/DDEEr.   r   Nr	   T
r   r   r   r   r&   r7   r*   r?   r@   rA   r:   rC   r@   r   rO   rf   rP   rf   r@   r   )rD   rE   rF   rG   r-   r   r   r   r   r   r=   rH   r.   r,   r   r     sk    
  !! ! 	!
 ! 
!9(0
KGFr.   r   c                  D    e Zd ZdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZd	dZy)
r   a  
    Computes the Smith-Waterman measure between two strings.
    The Smith-Waterman algorithm performs local sequence alignment;
    that is, for determining similar regions between two strings.
    Instead of looking at the total sequence, the Smith-Waterman algorithm compares
    segments of all possible lengths and optimizes the similarity measure.

    https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/smith-waterman.js
    Nc                X    || _         || _        |xs | j                  | _        || _        y r%   )r&   r   r'   r   r*   r   s        r,   r-   zSmithWaterman.__init__  s)     	  /DKK r.   c                4    t        t        t        |            S r%   rN   r   rM   r   s     r,   r   zSmithWaterman.maximum  r   r.   c                H   t         st        d      | j                  ||      \  }}| j                  ||      }||S t        j                  t        |      dz   t        |      dz   ft              }t        |d      D ]{  \  }}t        |d      D ]f  \  }}||dz
  |dz
  f   | j                  ||      z   }	||dz
  |f   | j                  z
  }
|||dz
  f   | j                  z
  }t        d|	|
|      |||f<   h } ||j                  d   dz
  |j                  d   dz
  f   S )Nz0Please, install numpy for Smith-Waterman measurer	   rk   rx   r   )rU   r   r4   r5   rn   rM   r   ro   r   r   r   r   )r+   rO   rP   r;   r   rq   sc1rr   sc2r   r   r   s               r,   r=   zSmithWaterman.__call__  sI   PQQ$$R,B""2r*M;;Wq[#b'A+&
  !, 		?FAs#Ba0 ?3 !QA.sC1HH "!a%(+dmm;!!QU(+dmm;!$Qvv!>A?		? q)A-x~~a/@1/DDEEr.   r   r   rB   r   r   rH   r.   r,   r   r     sU    	  
!
! 
! 	
!
 
! 

!(Fr.   r   c                  R    e Zd ZdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZd	dZd	dZd
dZy)r   zGotoh score
    Gotoh's algorithm is essentially Needleman-Wunsch with affine gap
    penalties:
    https://www.cs.umd.edu/class/spring2003/cmsc838t/papers/gotoh1982.pdf
    Nc                    || _         || _        || _        |r|| _        || _        y | j                  | _        || _        y r%   )r&   gap_opengap_extr   r'   r*   )r+   r   r   r   r&   r*   s         r,   r-   zGotoh.__init__9  s@     	 $DM ! !KKDM r.   c                6    t        t        t        |             S r%   r   r   s     r,   r   zGotoh.minimumJ  s    CY'(((r.   c                4    t        t        t        |            S r%   r   r   s     r,   r   zGotoh.maximumM  r   r.   c           
     z   t         st        d      | j                  ||      \  }}t        |      }t        |      }t        j                  |dz   |dz   ft
              }t        j                  |dz   |dz   ft
              }t        j                  |dz   |dz   ft
              }d|d<   t        d      |d<   t        d      |d<   t        d|dz         D ]Y  }t        d      ||df<   | j                   | j                  |dz
  z  z
  ||df<   t        d      ||df<   | j                   ||df<   [ t        d|dz         D ]Y  }	t        d      |d|	f<   t        d      |d|	f<   | j                   |d|	f<   | j                   | j                  |	dz
  z  z
  |d|	f<   [ t        |d      D ]  \  }}
t        |d      D ]  \  }	}| j                  |
|      }t        ||dz
  |	dz
  f   |z   ||dz
  |	dz
  f   |z   ||dz
  |	dz
  f   |z         |||	f<   t        ||dz
  |	f   | j                  z
  ||dz
  |	f   | j                  z
        |||	f<   t        |||	dz
  f   | j                  z
  |||	dz
  f   | j                  z
        |||	f<     d |j                  D        \  }}	t        |||	f   |||	f   |||	f         S )	Nz'Please, install numpy for Gotoh measurer	   rk   r   )r   r   z-infrx   c              3  &   K   | ]	  }|d z
    yw)r	   NrH   )r1   ns     r,   r3   z!Gotoh.__call__.<locals>.<genexpr>  s     +!A+s   )rU   r   r4   rM   rn   r   rW   r   r   ro   r   r   r   )r+   rO   rP   len_s1len_s2d_matp_matq_matrq   rr   r   r   sim_vals                r,   r=   zGotoh.__call__P  s   GHH$$R,B RRVaZ!4EBVaZ!4EBVaZ!4EBdFmdFmdq&1*% 	)A-E!Q$K==.4<<1q5+AAE!Q$K-E!Q$K==.E!Q$K		)
 q&1*% 	BA-E!Q$K-E!Q$K==.E!Q$K==.4<<1q5+AAE!Q$K		B  !, 	FAs#Ba0 3--S1!!a%Q,''1!a%Q,''1!a%Q,''1ad
 "!a%(Odmm3!a%(Odll2ad "!QU(Odmm3!QU(Odll2ad	" ,u{{+15A;adU1a4[99r.   )r	   g?Nr	   T)r   r7   r   r   r   r   r&   r7   r*   r?   r@   rA   rB   r   )rD   rE   rF   rG   r-   r   r   r=   rH   r.   r,   r   r   2  sf      !! ! 	!
 ! ! 
!")(0:r.   r   c                  L    e Zd ZU dZdZded<   d
ddZddZedd       Z	ddZ
y	)r   z`strcmp95 similarity

    http://cpansearch.perl.org/src/SCW/Text-JaroWinkler-0.1/strcmp95.c
    )$)AE)r   I)r   O)r   U)BV)r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   Y)r   r   )CG)r   F)Wr   )r   r   )XK)SZ)r   r   )Qr   )r   r   )MN)Lr   )r   r   )PR)r   J)2r   )5r   )8r   )1r   )r   r   )0r   )r   r   )r   r   )r   r   ztuple[tuple[str, str], ...]sp_mxc                     || _         || _        y r%   )long_stringsr*   )r+   r  r*   s      r,   r-   zStrCmp95.__init__  s    ( r.   c                     yr   rH   r   s     r,   r   zStrCmp95.maximum  r   r.   c                4    dt        |       cxk  xr dk  S c S )Nr   [   )ord)chars    r,   	_in_rangezStrCmp95._in_range  s    3t9!r!!!!r.   c                   |j                         j                         }|j                         j                         }| j                  ||      }||S t        |      }t        |      }t	        t
              }| j                  D ]  \  }}d|||f<   d|||f<    ||kD  r|}	|}
n|}	|}
dg|	z  }dg|	z  }t        d|	dz  dz
        }	d}|dz
  }t        |      D ]Y  \  }}t        ||	z
  d      }t        ||	z   |      }t        ||dz         D ]$  }||   dk(  s||   |k(  sd||<   d||<   |dz  } Y [ |dk(  rydx}}t        |      D ]9  \  }}||   st        ||      D ]  }||   dk7  s|dz   } n ||   k7  s5|dz  }; |dz  }d}|
|kD  rt        |      D ]q  }||   dk7  r| j                  ||         s!t        |      D ]C  }||   dk7  r| j                  ||         s!||   ||   f|vr.||||   ||   f   z  }d||<    q s |dz  |z   }||z  ||z  z   }|||z
  |z  z  }|dz  }|dk  r|S t        |
d	      }d}t        ||      D ]*  \  }}||k\  r n ||k7  r n|j                         r n|dz  }, |r||d
z  d|z
  z  z  }| j                  s|S |
d	k  r|S ||dz   k  sd|z  |
|z   k  r|S |d   j                         r|S ||z
  dz
  ||z   |dz  z
  dz   z  }|d|z
  |z  z  }|S )Nr   r   rm   r	   r   g      $@g      @r   r   r   r   )stripupperr5   rM   r   r7   r   r   ro   rN   rW   r  r8   isdigitr  )r+   rO   rP   r;   r   r   adjwtr   r   r   minvs1_flags2_flagnum_comyl1rq   r   lowlimhilimrr   r   n_transn_siminum_simr   r   ress                              r,   r=   zStrCmp95.__call__  s   XXZXXZ""2r*MRRC  jj 	FBE"b&ME"b&M	 F?!LD!LD #$#$1la/!34 qjm 	FAs\)1-FL(#.E6519- 1:?r!u|!"GAJ!"GAJqLG	 a< Gm 	FAs1:1f% 1:?AA be|1	 Q, '>6] 1:?~~be,v 	AqzQ >>"Q%0 1r!u~U2 eBqE2a5L11F!"GAJ	 4-') 6!Gf$447W$//# S=M aLB 	HCAvcz{{}FA	 a#gv..F   M19Ma!eq7{TAX5Ma5==?M{Q6F?QU#:Q#>?3<3&&r.   N)FT)r  r?   r*   r?   r@   rA   rB   )r@   r?   )rO   strrP   r  r@   r   )rD   rE   rF   rG   r   __annotations__r-   r   staticmethodr  r=   rH   r.   r,   r   r     s:    *E& ! " "xr.   r   c                  D    e Zd ZdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZd	dZy)
r   a!  
    Compute the Hamming distance between the two or more sequences.
    The Hamming distance is the number of differing items in ordered sequences.

    http://www.sial.iias.spb.su/files/386-386-1-PB.pdf
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/mlipns.js
    c                <    || _         || _        || _        || _        y r%   )r&   	thresholdmaxmismatchesr*   )r+   r  r  r&   r*   s        r,   r-   zMLIPNS.__init__   s!     	"* r.   c                     yr   rH   r   s     r,   r   zMLIPNS.maximum+  r   r.   c                ^    | j                   | } | j                  | }||S d} t               | }t        t	        t
        |            }t        |      rU|| j                  k  rF|syd||z
  |z  z
  | j                  k  ry|dz  }|dz  }|dz  }t        |      r|| j                  k  rF|syy)Nr   r	   )	r4   r5   r   r   r   rM   allr  r  )r+   r:   r;   
mismatcheshammaxlens         r,   r=   zMLIPNS.__call__.  s    'D''3	"""I.M
gi#Si())nt/A/A!AFSLF**dnn<!OJ1HCaKF )nt/A/A!A r.   N)g      ?rm   r	   T)
r  r   r  r7   r&   r7   r*   r?   r@   rA   rB   r   r   rH   r.   r,   r   r     sR     "&		!	!	! 	! 		!
 
	!r.   r   )+
__future__r   collectionsr   	itertoolsr   typingr   r   r   baser
   _Baser   _BaseSimilaritytypesr   r   rU   r   __all__r"   r   r   r   r   r   r   r   r   r   r   r   r   dameraur   r   r   r   r!   r    r   r   rH   r.   r,   <module>r0     s4   " $ ! ) ) C $

 CLGe G@I$% I$XY6 Y6xi/ iX
; 
TFo TFn3FO 3FlN:O N:bQ Qh,_ ,^ )m 2 4 4
v}"$ :	  Es   C5 5C?>C?