o
    fü2i"  ã                   @   st   d dl Zd dlZd dlmZmZmZ d dlm	Z	 dd„ Z
dd„ Zdd	„ Zd
d„ Zdd„ Zdd„ Zdd„ Zdd„ ZdS )é    N)Ú
get_loggerÚtest_nerÚbio_to_json)Úinput_from_linec           
      C   s`   |\}}}}}| j t |¡| jt |¡| jt |¡| jdi}	|r.t |¡|	| j< |j|	| j< |	S )z…
    :param is_train: Flag, True for train batch
    :param batch: list train/evaluate data
    :return: structured data to feed
    g      ð?)Ú	input_idsÚnpÚasarrayÚ
input_maskÚsegment_idsÚdropoutÚtargetsÚdropout_keep_prob)
ÚmodelÚis_trainÚbatchÚconfigÚ_r
   ÚcharsÚmaskÚtagsÚ	feed_dict© r   úE/home/tuu73405/projects/NER-BERT-BiLSTM-CRF--master/train_val_test.pyÚget_feed_dict   s   ür   c           
      C   sî   | j \}}tj||ftjd}tj||ftjd}| d |d< td|ƒD ]%}tj||d  dd| }| | tj|dd ||< tj|dd||< q$t |d ¡g}t|d ddƒD ]}| 	|||d f ¡ qZ| 
¡  t |d ¡}	||	fS )u3  
    åŠŸèƒ½ï¼šæ›¿ä»£ tensorflow-addons çš„ viterbi_decodeï¼Œé€‚ç”¨äºŽæ‰€æœ‰ç‰ˆæœ¬
    å‚æ•°ï¼š
        score: å½¢çŠ¶ä¸º [seq_len, num_tags] çš„å‘å°„åˆ†æ•°çŸ©é˜µ
        transition_params: å½¢çŠ¶ä¸º [num_tags, num_tags] çš„è½¬ç§»åˆ†æ•°çŸ©é˜µ
    è¿”å›žï¼š
        (æœ€ä½³è·¯å¾„åˆ—è¡¨, è·¯å¾„æ€»åˆ†æ•°)
    )Údtyper   é   ©Úaxiséÿÿÿÿ)Úshaper   ÚzerosÚfloat32Úint32ÚrangeÚexpand_dimsÚmaxÚargmaxÚappendÚreverse)
ÚscoreÚtransition_paramsÚseq_lenÚnum_tagsZtrellisZbackpointersÚtZ
max_scoresZ	best_pathZ
best_scorer   r   r   Úviterbi_decode   s   

r.   c                 C   sœ   g }d}t  |g|j dg g¡}t| |ƒD ]5\}}|d|… }|t  |dg¡ }	t j||	gdd} t j|| gdd} t| |ƒ\}
}| |
dd… ¡ q|S )zÎ
    :param logits: [batch_size, num_steps, num_tags]float32, logits
    :param lengths: [batch_size]int32, real length of each sequence
    :param matrix: transaction matrix for inference
    :return:
    g     @Àr   Nr   r   )r   r   r,   ÚzipÚonesÚconcatenater.   r'   )ÚlogitsÚlengthsÚmatrixr   ÚpathsÚsmallÚstartr)   ÚlengthÚpadÚpathr   r   r   r   ÚdecodeA   s   r;   c              
      s
  g }|j  ¡ }| ¡ D ]w}|d }|d }	t|d||ƒ}
|  |j|jg|
¡\}}t||||ƒ}tt	|ƒƒD ]L}g }|| d|| … }‡ fdd„|	| d|| … D ƒ}‡ fdd„|| d|| … D ƒ}t
|||ƒD ]\}}}| d	 |||g¡¡ qk| |¡ q5q|S )
z“
    :param sess: session  to run the model
    :param data: list of data
    :param id_to_tag: index to tag name
    :return: evaluate result
    r   r   FNc                    ó   g | ]}ˆ t |ƒ ‘qS r   ©Úint©Ú.0Úx©Ú	id_to_tagr   r   Ú
<listcomp>i   ó    zevaluate_.<locals>.<listcomp>r   c                    r<   r   r=   r?   rB   r   r   rD   j   rE   Ú )ÚtransÚevalÚ
iter_batchr   Úrunr3   r2   r;   r#   Úlenr/   r'   Újoin)Úsessr   Údata_managerrC   r   ÚresultsrG   r   ÚstringsÚlabelsr   r3   ÚscoresÚbatch_pathsÚiÚresultÚstringZgoldÚpredÚcharr   rB   r   Ú	evaluate_W   s$   
""ùrY   c                 C   s    |  d |¡¡ t| ||||ƒ}t||jƒ}|D ]}	|  |	¡ qt|d  ¡  ¡ d ƒ}
|dkrN|j 	¡ }|
|krJt
 |j|
¡ 	¡  |  d |
¡¡ |
|kS d S )Nzevaluate:{}r   r   Údevznew best dev f1 score:{:>.3f})ÚinfoÚformatrY   r   Úresult_pathÚfloatÚstripÚsplitÚbest_dev_f1rH   ÚtfÚassign)rM   r   ÚnamerN   rC   Úloggerr   Zner_resultsÚ
eval_linesÚlineÚf1Zbest_test_f1r   r   r   Úevaluateq   s   
ûri   c                 C   sŽ  t |jƒ}tjjt ¡ dd}t ¡ }d|j_|j	}tj
|d}	tj |j¡}
|
rBtj |
j¡rB| d|
j ¡ | j |	|
j¡ n| d¡ |	 t ¡ ¡ | d¡ g }t|jƒD ]Z}|jddD ]<}t| d||ƒ}|	 | j| j| jg|¡\}}}| |¡ ||j d	krž|| d
 }| d ||| |t |¡¡¡ g }qbt |	| d||||ƒ}|r´|j!|	|j"|d qZW d   ƒ d S 1 sÀw   Y  d S )Né   )Úmax_to_keepT©r   ú Reading model parameters from %sz$Created model with fresh parameters.zstart training)Úshuffler   r   z*iteration:{} step:{}/{}, NER loss:{:>9.6f}rZ   )Úglobal_step)#r   Úlog_filerb   ÚtrainÚSaverÚglobal_variablesÚConfigProtoÚgpu_optionsÚallow_growthÚlen_dataÚSessionÚget_checkpoint_stateÚ	ckpt_pathÚcheckpoint_existsÚmodel_checkpoint_pathr[   ÚsaverÚrestorerJ   Úglobal_variables_initializerr#   ÚepochrI   r   ro   ÚlossÚtrain_opr'   Úprint_per_batchr\   r   Úmeanri   ÚsaveÚcheckpoint_path)r   r   Útrain_managerÚdev_managerrC   re   r}   Ú	tf_configÚsteps_per_epochrM   Úckptr   rT   r   r   ro   Z
batch_lossr   Ú	iterationÚbestr   r   r   rq      sD   



ÿÿ€€ò"õrq   c           	   	   C   sÄ   t |jƒ}t ¡ }d|j_tj|dE}tj |j	¡}| 
t ¡ ¡ | 
t ¡ ¡ tj ¡ }|rFtj |j¡rF| d|j ¡ | ||j¡ t|| d||||ƒ W d   ƒ d S 1 s[w   Y  d S )NTrl   rm   Útest)r   rp   rb   rt   ru   rv   rx   rq   ry   rz   rJ   r   Úlocal_variables_initializerrr   r{   r|   r[   r~   ri   )	r   r   Útest_managerrC   re   r‰   rM   r‹   r}   r   r   r   rŽ   ¤   s   

"örŽ   c                    s2  t |jƒ}t ¡ }d|j_tj|d|}tj |j	¡}| 
t ¡ ¡ | 
t ¡ ¡ tj ¡ }|rFtj |j¡rF| d|j ¡ | ||j¡ 	 tdƒ}	t|	|j|ƒ}
| j |¡}t| d|
|ƒ}| 
| j| jg|¡\}}t||||ƒ}‡ fdd„|d D ƒ}t|
d |d	d
… ƒ}t|d ƒ qG1 s’w   Y  d S )NTrl   rm   zinput sentence, please:Fc                    s   g | ]}ˆ | ‘qS r   r   )r@   ÚidxrB   r   r   rD   Ê   s    zdemo.<locals>.<listcomp>r   r   r   Úentities)r   rp   rb   rt   ru   rv   rx   rq   ry   rz   rJ   r   r   rr   r{   r|   r[   r~   Úinputr   Úmax_seq_lenrG   rH   r   r3   r2   r;   r   Úprint)r   r   rC   Ú	tag_to_idre   r‰   rM   r‹   r}   rg   ÚinputsrG   r   r3   rR   rS   r   rU   r   rB   r   Údemoµ   s.   

÷ör˜   )Ú
tensorflowrb   Únumpyr   Úutilsr   r   r   Údata_helperr   r   r.   r;   rY   ri   rq   rŽ   r˜   r   r   r   r   Ú<module>   s   ##