
    9e                        d dl mZ d dlmZ d dlmZmZ d dlmZm	Z	m
Z
mZ ddlmZmZ 	 d dlZg dZ ed	      Z G d
 de      Z G d de      Z e       Z e       Zy# e$ r dZY :w xY w)    )annotations)defaultdict)groupbyzip_longest)AnyIteratorSequenceTypeVar   )BaseBaseSimilarityN)MRAEditexmraeditexTc                  (    e Zd ZdZddZddZddZy)r   zWestern Airlines Surname Match Rating Algorithm comparison rating
    https://en.wikipedia.org/wiki/Match_rating_approach
    https://github.com/Yomguithereal/talisman/blob/master/src/metrics/mra.js
    c                    |D cg c]  }t        | j                  |             }}t        t        t        |            S c c}w N)list	_calc_mramaxmaplen)self	sequencesss      B/usr/lib/python3/dist-packages/textdistance/algorithms/phonetic.pymaximumzMRA.maximum    s;    6?@T$..+,@	@3sI&'' As   !Ac                    |s|S |j                         }|d   dj                  d |dd  D              z   }dj                  d t        |      D              }t        |      dkD  r|d d |dd  z   S |S )	Nr    c              3  *   K   | ]  }|d vs|  yw)AEIOUN ).0cs     r   	<genexpr>z MRA._calc_mra.<locals>.<genexpr>(   s      Iq8H Is   	r   c              3  &   K   | ]	  \  }}|  y wr   r$   )r%   char_s      r   r'   z MRA._calc_mra.<locals>.<genexpr>*   s     9at9s         )upperjoinr   r   )r   words     r   r   zMRA._calc_mra$   sw    Kzz|Aw IDH IIIww974=99t9q=8d23i''    c           	        t        |      sy|D cg c]  }t        | j                  |             }}t        t        t        |            }t	        |      }t        |      }t        |t        |      z
        |kD  ryt        |      D ]  }g }t        |      }t        | D ]#  }	 | j                  |	 r|j                  |	       % t        t        t        |       }t        ||t                     }
|
D cg c]  \  }}|||d  z    }}}t        t        t        |            } |s|S |t        |      z
  S c c}w c c}}w )Nr   )	fillvalue)allr   r   r   r   r   absminrangezip_identappendr   )r   r   r   lengthscount
max_lengthr*   new_sequencesminlencharssss1s2s                r   __call__zMRA.__call__/   s6   9~6?@T$..+,@	@s3	*+G\
zCL()E1u 	0AM\Fi 0"t{{E*!((/0  c=&9:M ]IHB8:;fb"bk);I;3sI./G	0 CL((/ A" <s   !D:8D?N)r   strreturnint)r0   rE   rF   rE   )__name__
__module____qualname____doc__r   r   rD   r$   r1   r   r   r      s    
(	)r1   r   c                     e Zd ZU dZ ed       ed       ed       ed       ed       ed       ed       ed	       ed
       ed      f
Zded<    ed      Z	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ	ddZ
ddZddZy)r   al  
    https://anhaidgroup.github.io/py_stringmatching/v0.3.x/Editex.html
    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.14.3856&rep=rep1&type=pdf
    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.18.2138&rep=rep1&type=pdf
    https://github.com/chrislit/blob/master/abydos/distance/_editex.py
    https://habr.com/ru/post/331174/ (RUS)
    AEIOUYBPCKQDTLRMNGJFPVSXZCSZtuple[frozenset[str], ...]groupsHWNc                   || _         t        || j                         | _        t        || j                        | _        || _        || _        ||t        d      || _        || _        t        j                  | j                   | _        y )Nz+`ungrouped` argument required with `groups`)
match_costr   
group_costmismatch_costlocalexternal
ValueErrorrX   	ungrouped	frozensetuniongrouped)r   r^   r[   r\   r]   rX   ra   r_   s           r   __init__zEditex.__init__b   sx     %j$//: @
   !NOO DK&DN 4r1   c                N    t        t        t        |            | j                  z  S r   )r   r   r   r]   )r   r   s     r   r   zEditex.maximumz   s    3sI&'$*<*<<<r1   c                
      j                   | r j                  S t        t         fd|            r j                  S  j
                  D ](  t        t        fd|            s j                  c S   j                  S )Nc                     | j                   vS r   )rd   )xr   s    r   <lambda>zEditex.r_cost.<locals>.<lambda>   s    Qdll2 r1   c                    | v S r   r$   )ri   groups    r   rj   zEditex.r_cost.<locals>.<lambda>   s    e r1   )r9   r[   anyr   r]   rX   r4   r\   )r   elementsrl   s   ` @r   r_costzEditex.r_cost}   sr    4;;!??"s2H=>%%%[[ 	'E3+X67&	' !!!r1   c                x     | j                   | s|d   | j                  v r| j                  S  | j                  | S )Nr   )r9   ra   r\   ro   )r   rn   s     r   d_costzEditex.d_cost   s:    t{{H%(1+*G??"t{{H%%r1   c                   | j                  ||      }||S | j                  ||      }d|j                         z   }d|j                         z   }t        |      dz
  }t        |      dz
  }t        r$t	        j
                  |dz   |dz   ft              }nt        d       }| j                  sAt        d|dz         D ]/  }||dz
     d   | j                  ||dz
     ||         z   ||   d<   1 t        d|dz         D ]/  }	|d   |	dz
     | j                  ||	dz
     ||	         z   |d   |	<   1 t        t        ||dd        d      D ]  \  }\  }
}t        t        ||dd        d      D ]r  \  }	\  }}t        ||dz
     |	   | j                  |
|      z   ||   |	dz
     | j                  ||      z   ||dz
     |	dz
     | j                  ||      z         ||   |	<   t  ||   |   }t        ||      S )N r   )dtypec                      t        t              S r   )r   rG   r$   r1   r   rj   z!Editex.__call__.<locals>.<lambda>   s    C(8 r1   r   )start)quick_answerr   r.   r   numpyzerosrG   r   r^   r7   rq   	enumerater8   r6   ro   )r   rB   rC   resultr=   len_s1len_s2d_matijcs1_prevcs1_currcs2_prevcs2_currdistances                  r   rD   zEditex.__call__   s*   ""2r*M \\"b)
288:288:R1R1KK!VaZ 8DE 89Ezz1fqj) N#AEl1oBq1uIr!u0MMaNq&1*% 	JA(1q5/DKK1q5	2a5,IIE!HQK	J (1RAB'J 	#A#(+4SRV_A+N ''Hh!!a%LOdkk(H&EE!HQUOdkk(H&EE!a%LQ'$++h*IIa	 =(8Z((r1   )Fr   r      NNT)r^   boolr[   rG   r\   rG   r]   rG   rX   rW   ra   zfrozenset[str]r_   r   rF   None)r   r	   rF   rG   )rn   rE   rF   rG   )rB   rE   rC   rE   rF   float)rH   rI   rJ   rK   rb   rX   __annotations__ra   re   r   ro   rq   rD   r$   r1   r   r   r   L   s     	($%$$$$%%%*F&  $I -1$(55 5 	5
 5 +5 "5 5 
50="&
#)r1   r   )
__future__r   collectionsr   	itertoolsr   r   typingr   r   r	   r
   baser   _Baser   _BaseSimilarityrx   ImportError__all__r   r   r   r   r   r$   r1   r   <module>r      sy    " $ * 3 3 C
 CL/)/ /)dc)U c)L 
e	I  Es   A A)(A)