B
    |N3i¨  ã               @   sx   d dl Zd dlZd dlmZ d dlmZmZm	Z	 d dl
mZ dd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Zdd„ ZdS )é    N)Úviterbi_decode)Ú
get_loggerÚtest_nerÚbio_to_json)Úinput_from_linec       
      C   s`   |\}}}}}| j t |¡| jt |¡| jt |¡| jdi}	|r\t |¡|	| j< |j|	| j< |	S )z…
    :param is_train: Flag, True for train batch
    :param batch: list train/evaluate data
    :return: structured data to feed
    g      ð?)Ú	input_idsÚnpÚasarrayÚ
input_maskÚsegment_idsÚdropoutÚtargetsÚdropout_keep_prob)
ÚmodelÚis_trainÚbatchÚconfigÚ_r   ÚcharsÚmaskÚtagsÚ	feed_dict© r   úE/home/tuu73405/projects/NER-BERT-BiLSTM-CRF--master/train_val_test.pyÚget_feed_dict
   s    
r   c             C   s    g }d}t  |g|j dg g¡}xxt| |ƒD ]j\}}|d|… }|t  |dg¡ }	t j||	gdd} t j|| gdd} t| |ƒ\}
}| |
dd… ¡ q.W |S )zÎ
    :param logits: [batch_size, num_steps, num_tags]float32, logits
    :param lengths: [batch_size]int32, real length of each sequence
    :param matrix: transaction matrix for inference
    :return:
    g     @Àr   Né   )Úaxis)r   r	   Únum_tagsÚzipÚonesÚconcatenater   Úappend)ÚlogitsÚlengthsÚmatrixr   ÚpathsÚsmallÚstartÚscoreÚlengthÚpadÚpathr   r   r   r   Údecode   s    r,   c          
      s  g }|j  ¡ }x| ¡ D ]ö}|d }|d }	t|d||ƒ}
|  |j|jg|
¡\}}t||||ƒ}x¬tt	|ƒƒD ]œ}g }|| d|| … }‡ fdd„|	| d|| … D ƒ}‡ fdd„|| d|| … D ƒ}x0t
|||ƒD ] \}}}| d	 |||g¡¡ qÞW | |¡ qpW qW |S )
z“
    :param sess: session  to run the model
    :param data: list of data
    :param id_to_tag: index to tag name
    :return: evaluate result
    r   éÿÿÿÿFNc                s   g | ]}ˆ t |ƒ ‘qS r   )Úint)Ú.0Úx)Ú	id_to_tagr   r   ú
<listcomp>E   s    zevaluate_.<locals>.<listcomp>r   c                s   g | ]}ˆ t |ƒ ‘qS r   )r.   )r/   r0   )r1   r   r   r2   F   s    ú )ÚtransÚevalÚ
iter_batchr   Úrunr#   r"   r,   ÚrangeÚlenr   r!   Újoin)Úsessr   Údata_managerr1   r   Úresultsr4   r   ÚstringsÚlabelsr   r#   ÚscoresÚbatch_pathsÚiÚresultÚstringZgoldÚpredÚcharr   )r1   r   Ú	evaluate_3   s"    
""rG   c             C   s¤   |  d |¡¡ t| ||||ƒ}t||jƒ}x|D ]}	|  |	¡ q2W t|d  ¡  ¡ d ƒ}
|dkr |j 	¡ }|
|kr˜t
 |j|
¡ 	¡  |  d |
¡¡ |
|kS d S )Nzevaluate:{}r   r-   Údevznew best dev f1 score:{:>.3f})ÚinfoÚformatrG   r   Úresult_pathÚfloatÚstripÚsplitÚbest_dev_f1r5   ÚtfÚassign)r;   r   Únamer<   r1   Úloggerr   Zner_resultsÚ
eval_linesÚlineÚf1Zbest_test_f1r   r   r   ÚevaluateM   s    

rW   c             C   s‚  t |jƒ}tjjt ¡ dd}t ¡ }d|j_|j	}tj
|d6}	tj |j¡}
|
r†tj |
j¡r†| d|
j ¡ | j |	|
j¡ n| d¡ |	 t ¡ ¡ | d¡ g }xÆt|jƒD ]¸}xˆ|jddD ]x}t| d||ƒ}|	 | j| j| jg|¡\}}}| |¡ ||j d	krÊ|| d
 }| d ||| |t |¡¡¡ g }qÊW t |	| d||||ƒ}|r¸|j!|	|j"|d q¸W W d Q R X d S )Né   )Úmax_to_keepT)r   z Reading model parameters from %sz$Created model with fresh parameters.zstart training)Úshuffler   r   z*iteration:{} step:{}/{}, NER loss:{:>9.6f}rH   )Úglobal_step)#r   Úlog_filerP   ÚtrainÚSaverÚglobal_variablesÚConfigProtoÚgpu_optionsÚallow_growthÚlen_dataÚSessionÚget_checkpoint_stateÚ	ckpt_pathÚcheckpoint_existsÚmodel_checkpoint_pathrI   ÚsaverÚrestorer7   Úglobal_variables_initializerr8   Úepochr6   r   r[   ÚlossÚtrain_opr!   Úprint_per_batchrJ   r   ÚmeanrW   ÚsaveÚcheckpoint_path)r   r   Útrain_managerÚdev_managerr1   rS   ri   Ú	tf_configÚsteps_per_epochr;   Úckptrm   rB   r   r   r[   Z
batch_lossr   Ú	iterationÚbestr   r   r   r]   ]   s6    



r]   c       	   	   C   s®   t |jƒ}t ¡ }d|j_tj|d~}tj |j	¡}| 
t ¡ ¡ | 
t ¡ ¡ tj ¡ }|rŒtj |j¡rŒ| d|j ¡ | ||j¡ t|| d||||ƒ W d Q R X d S )NT)r   z Reading model parameters from %sÚtest)r   r\   rP   r`   ra   rb   rd   r]   re   rf   r7   rk   Úlocal_variables_initializerr^   rg   rh   rI   rj   rW   )	r   r   Útest_managerr1   rS   ru   r;   rw   ri   r   r   r   rz   €   s    

rz   c          	      s.  t |jƒ}t ¡ }d|j_tj|dþ}tj |j	¡}| 
t ¡ ¡ | 
t ¡ ¡ tj ¡ }|rŒtj |j¡rŒ| d|j ¡ | ||j¡ x’tdƒ}	t|	|j|ƒ}
| j |¡}t| d|
|ƒ}| 
| j| jg|¡\}}t||||ƒ}‡ fdd„|d D ƒ}t|
d |d	d
… ƒ}t|d ƒ qŽW W d Q R X d S )NT)r   z Reading model parameters from %szinput sentence, please:Fc                s   g | ]}ˆ | ‘qS r   r   )r/   Úidx)r1   r   r   r2   ¦   s    zdemo.<locals>.<listcomp>r   r   r-   Úentities)r   r\   rP   r`   ra   rb   rd   r]   re   rf   r7   rk   r{   r^   rg   rh   rI   rj   Úinputr   Úmax_seq_lenr4   r5   r   r#   r"   r,   r   Úprint)r   r   r1   Ú	tag_to_idrS   ru   r;   rw   ri   rU   Úinputsr4   r   r#   r@   rA   r   rC   r   )r1   r   Údemo‘   s*    

r„   )Ú
tensorflowrP   Únumpyr   Útensorflow.contrib.crfr   Úutilsr   r   r   Údata_helperr   r   r,   rG   rW   r]   rz   r„   r   r   r   r   Ú<module>   s   #