
    9e                        d dl mZ d dlmZ d dlmZ ddlmZ ddl	m
Z ddlmZ 	 d d	lZg dZ G d de      Z G d de      Z G d de      Z e       Z e       Z e       Zy	# e$ r d d
lmZ d	ZY Jw xY w)    )annotations)SequenceMatcher)Any   )find_ngrams   )BaseSimilarity)TestFuncN)array)lcsseqlcsstrratcliff_obershelpLCSSeqLCSStrRatcliffObershelpc                  N    e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 d	dZd
dZddZddZddZy)r   zplongest common subsequence similarity

    https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
    Nc                J    || _         |xs | j                  | _        || _        y N)qval_ident	test_funcexternal)selfr   r   r   s       H/usr/lib/python3/dist-packages/textdistance/algorithms/sequence_based.py__init__zLCSSeq.__init__"   s"     	"1dkk     c           
        t         r6t        j                  t        |      dz   t        |      dz   ft              }n>t	        t        |      dz         D cg c]  }t        ddgt        |      dz   z          }}t        |      D ]_  \  }}t        |      D ]L  \  }}||k(  r||   |   dz   ||dz      |dz   <   #t        ||dz      |   ||   |dz            ||dz      |dz   <   N a d}	t        |      t        |      }}|dk7  rl|dk7  rg||   |   ||dz
     |   k(  r|dz  }nB||   |   ||   |dz
     k(  r|dz  }n(||dz
     ||dz
     k(  sJ ||dz
     |	z   }	|dz  }|dz  }|dk7  r|dk7  rg|	S c c}w )z
        https://github.com/chrislit/abydos/blob/master/abydos/distance/_lcsseq.py
        http://www.dis.uniroma1.it/~bonifaci/algo/LCSSEQ.py
        http://rosettacode.org/wiki/Longest_common_subsequence#Dynamic_Programming_8
        r   )dtypeLr    )numpyzeroslenintranger   	enumeratemax)
r   seq1seq2lengths_ichar1jchar2results
             r   _dynamiczLCSSeq._dynamic,   s    kk3t9q=#d)a-"@LGBGD	TUBVWQuS1#TQ"78WGW "$ 	VHAu%dO V5E>,3AJqMA,=GAEN1q5),/Aq0A71:aRSeCT,UGAEN1q5)	V	V 4y#d)11faqz!}Aq 11QA'!*QU"33QAE{d1q5k111a!ev-QQ 1fa / Xs   #E3c                r   t        |      s t        |d                S  | j                  |D cg c]  }|d   	 c} r"|d   d   }t        d |D              } | | |z   S  t        |d                }t	        |      D ]0  \  }}|d | |d d fz   ||dz   d  z   }t         | | |gt              }2 |S c c}w )Nr   c              3  &   K   | ]	  }|d d   y w)Nr3    ).0ss     r   	<genexpr>z$LCSSeq._recursive.<locals>.<genexpr>T   s     8af8s   r   key)alltyper   tupler&   r'   r#   )r   	sequencesr7   cmr,   sss          r   
_recursivezLCSSeq._recursiveO   s    9~%4	!%''4>>95aAbE56!R A8i88I#a''D1 i( 	-DAq2A!CR&+iA.??BT2YN,A	-  6s   B4c                ~    |sy | j                   | }t        |      dk(  r | j                  | S  | j                  | S )Nr    r   )_get_sequencesr#   r1   rB   r   r>   s     r   __call__zLCSSeq.__call__\   sH    'D''3	y>Q 4==),,"4??I..r   c                    t         | |       S r   r#   rE   s     r   
similarityzLCSSeq.similaritye       4#$$r   )r   NT)r   r$   r   r
   r   boolreturnNone)r(   strr)   rN   rL   rN   r>   rN   rL   rN   )rL   r$   )	__name__
__module____qualname____doc__r   r1   rB   rF   rI   r5   r   r   r   r      sR     "	!! ! 	!
 
!!F/%r   r   c                  0    e Zd ZdZddZddZddZd	dZy)
r   z(longest common substring similarity
    c                    t        ||      }|j                  dt        |      dt        |            }||j                  |j                  |j                  z    S )N)abr   )_SequenceMatcherfind_longest_matchr#   rV   size)r   s1s2matchermatchs        r   	_standartzLCSStr._standartm   sJ    "R2.**1c"gq#b'B%''577UZZ/00r   c                    t        |t              }t        |      }t        |dd      D ]6  }t        ||      D ]%  }dj	                  |      }|D ]  }||vs   |c c S  8  t        |             S )Nr9   r   r3   r    )minr#   r%   r   joinr<   )r   r>   shortlengthnsubseqjoinedseqs           r   _customzLCSStr._customr   s    I3'Uvq"% 	"A%eQ/ "$ "CS(" "M"	" tE{}r   c                    t        |      syt        |      }|dk(  ry|dk(  r|d   S  | j                  | }|dk(  r+t        t	        t        |            dk  r | j
                  | S  | j                  | S )Nr    r   r   r      )r;   r#   rD   r'   mapr_   ri   )r   r>   rd   s      r   rF   zLCSStr.__call__   s~    9~YQ;Q;Q<'D''3	Q;3s3	23c9!4>>9--t||Y''r   c                    t         | |       S r   rH   rE   s     r   rI   zLCSStr.similarity   rJ   r   N)r[   rN   r\   rN   rL   rN   rO   r>   rN   rL   r$   )rP   rQ   rR   rS   r_   ri   rF   rI   r5   r   r   r   r   i   s    1
(%r   r   c                  (    e Zd ZdZddZddZddZy)r   a"  Ratcliff-Obershelp similarity
    This follows the Ratcliff-Obershelp algorithm to derive a similarity
    measure:
        1. Find the length of the longest common substring in sequences.
        2. Recurse on the strings to the left & right of each this substring
           in sequences. The base case is a 0 length common substring, in which
           case, return 0. Otherwise, return the sum of the current longest
           common substring and the left & right recursed sums.
        3. Multiply this length by 2 and divide by the sum of the lengths of
           sequences.

    https://en.wikipedia.org/wiki/Gestalt_Pattern_Matching
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/ratcliff-obershelp.js
    https://xlinux.nist.gov/dads/HTML/ratcliffObershelp.html
    c                     y)Nr   r5   rE   s     r   maximumzRatcliffObershelp.maximum   s    r   c                     t               | }t        |      }|dk(  ry|D cg c]  }|d |j                  |        }}|D cg c]  }||j                  |      |z   d   }} | j                  | |z    | j                  | z   S c c}w c c}w )Nr   )r   r#   find_find)r   r>   rf   rd   r7   beforeafters          r   rt   zRatcliffObershelp._find   s    9%VQ;.78!OQVVF^$886?@166&>F*+,@@tzz6"V+jdjj%.@@@ 9@s   BBc                     | j                   | }||S t        |      }t        t        t        |            } | j                  | }| | j
                  | z  |z  S r   )quick_answerr#   sumrl   rD   rt   )r   r>   r0   scountecounts        r   rF   zRatcliffObershelp.__call__   sf    """I.MYSi()'D''3	


I..77r   Nrn   )r>   rN   rL   float)rP   rQ   rR   rS   rq   rt   rF   r5   r   r   r   r      s     A8r   r   )
__future__r   difflibr   rX   typingr   utilsr   baser	   _BaseSimilaritytypesr
   r!   ImportErrorr   __all__r   r   r   r   r   r   r5   r   r   <module>r      s    " 8    3 J%_ J%Z%%_ %%P$8 $8N 
	&( U  Es   A% %A54A5