B
    ŸM3i˜$  ã               @   s\   d dl Zd dlmZ d dlmZ d dlmZ d dlm	Z	 G dd„ deƒZ
G dd	„ d	eƒZdS )
é    N)Úcrf_log_likelihood)Úinitializers)Ú
BaseConfig)Úmodelingc               @   sH   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZd
S )ÚConfigé€   é   éd   é   g      à?gü©ñÒMbP?ÚadamFTNéÈ   )Ú__name__Ú
__module__Ú__qualname__Ú
batch_sizeZepochZprint_per_batchZclipZdropout_keep_probÚlrÚ	optimizerÚzerosÚlowerÚnum_tagsÚgru_dimÚattention_dimÚmax_seq_lenZ	max_epochZsteps_check© r   r   úU/home/tuu73405/projects/NER-BERT-BiLSTM-CRF--master/models/Bert_BiGR_Attention_CRF.pyr   
   s   r   c               @   sL   e Zd Zdd„ Zdd„ Zdd„ Zddd	„Zdd
d„Zddd„Zddd„Z	dS )ÚBertBiGRUAttentionCrfc             C   s®   || _ tjtjd d gdd| _tjtjd d gdd| _tjtjd d gdd| _tjtjd d gdd| _tjtjdd| _	tj
dd	d
| _tj
dd	d
| _t ¡ | _|  ¡  d S )NÚ	input_ids)ÚdtypeÚshapeÚnameÚ
input_maskÚsegment_idsZTargetsZDropout)r   r   r   F)Z	trainableg        )ÚconfigÚtfZplaceholderÚint32r   r    r!   ÚtargetsÚfloat32ÚdropoutZVariableÚglobal_stepZbest_dev_f1r   Zxavier_initializerÚinitializerÚbert_bigru_attention_crf)Úselfr"   r   r   r   Ú__init__   s    
zBertBiGRUAttentionCrf.__init__c             C   s’  t  t  | j¡¡}t j|dd}t  |t j¡| _t  | j¡d | _	t  | j¡d | _
|  ¡ }t j || j¡}|  || jj| j¡}|  || j¡}|  |¡| _|  | j| j¡| _| jj}t  ¡ }t ||¡\}	}
t j ||	¡ tdƒ g }xB|D ]:}d}|j|
krd}n
| |¡ td|j|j|f ƒ qìW | jj}|d	krPt j  | jj!¡| _"nt#‚t  $| j|¡}t j%|d
d\}}| j"j&t'||ƒ| j(d| _)d S )Nr   )Zreduction_indicesr   éÿÿÿÿz**** Trainable Variables ****Ú z, *INIT_FROM_CKPT*z  name = %s, shape = %s%sr   g      ð?)Z	clip_norm)r(   )*r#   ÚsignÚabsr   Ú
reduce_sumÚcastr$   Úlengthsr   r   Ú	num_stepsÚbert_embeddingÚnnr'   ÚbiGRU_layerr"   r   Úattention_layerÚproject_layerÚlogitsÚ
loss_layerZlossÚinit_checkpointZtrainable_variablesr   Z"get_assignment_map_from_checkpointÚtrainZinit_from_checkpointÚprintr   Úappendr   ZAdamOptimizerr   ZoptÚKeyErrorZ	gradientsZclip_by_global_normZapply_gradientsÚzipr(   Ztrain_op)r+   ZusedÚlengthÚ	embeddingÚ
gru_inputsÚgru_outputsZattention_outputsr<   ZtvarsZassignment_mapZinitialized_variable_namesZ
train_varsÚvarZinit_stringr   ZgradsÚ_r   r   r   r*   .   s@    



z.BertBiGRUAttentionCrf.bert_bigru_attention_crfc             C   s8   t j | jj¡}t j|d| j| j| jdd}| 	¡ }|S )NTF)r"   Zis_trainingr   r    Ztoken_type_idsZuse_one_hot_embeddings)
r   Z
BertConfigZfrom_json_filer"   Zbert_config_pathZ	BertModelr   r    r!   Zget_sequence_output)r+   Zbert_configÚmodelrC   r   r   r   r5   \   s    z$BertBiGRUAttentionCrf.bert_embeddingNc       	      C   s–   t  |sdn|¡p i }x@dD ]8}t  |¡$ t jjj|| jt  ¡ d||< W dQ R X qW t jj|d |d |t j|d\}}W dQ R X t j	|dd	S )
u)  
        åŒå‘GRUå±‚ï¼ˆæ›¿æ¢åŽŸBiLSTMï¼‰
        :param gru_inputs: [batch_size, num_steps, emb_size]
        :param gru_dim: å•å‘GRUéšè—ç»´åº¦
        :param lengths: [batch_size] åºåˆ—é•¿åº¦
        :param name: ä½œç”¨åŸŸåç§°
        :return: [batch_size, num_steps, 2*gru_dim]
        Z
char_BiGRU)ÚforwardÚbackward)Zkernel_initializerZbias_initializerNrI   rJ   )r   Zsequence_lengthé   )Úaxis)
r#   Úvariable_scoper6   Zrnn_cellZGRUCellr)   Úzeros_initializerZbidirectional_dynamic_rnnr&   Úconcat)	r+   rD   r   r3   r   Zgru_cellÚ	directionZoutputsZfinal_statesr   r   r   r7   h   s    	
z!BertBiGRUAttentionCrf.biGRU_layerc             C   s  t  |sdn|¡ú | ¡ d j}t jd|| jjg| jd}t jd| jjgt  ¡ d}t jd| jjg| jd}t  	|d|g¡}t  
t j |||¡¡}	t j t  	t  |	t  	|ddg¡¡d| jg¡¡}
t j|| jt jd}|
| }
|
t j|
dd	d
 }
t j|
dd}|| }|S Q R X dS )u6  
        åŠ æ€§Attentionå±‚ï¼ˆå¯¹GRUè¾“å‡ºåŠ æƒï¼‰
        :param inputs: [batch_size, num_steps, 2*gru_dim] GRUè¾“å‡º
        :param lengths: [batch_size] åºåˆ—é•¿åº¦ï¼ˆç”¨äºŽmask paddingï¼‰
        :param name: ä½œç”¨åŸŸåç§°
        :return: [batch_size, num_steps, 2*gru_dim] åŠ æƒåŽçš„è¾“å‡º
        Z	attentionr-   ÚW_a)r   r)   Úb_aÚu_ar   )Úmaxlenr   T)rL   ZkeepdimsrK   )rL   N)r#   rM   Z	get_shapeÚvalueÚget_variabler"   r   r)   rN   ÚreshapeÚtanhr6   Ú	xw_plus_bZsoftmaxÚmatmulr4   Zsequence_maskr&   r1   Zexpand_dims)r+   Úinputsr3   r   Zhidden_sizerQ   rR   rS   Zinputs_reshapeZtanh_outputZalphaÚmaskZalpha_expandZattention_outputr   r   r   r8   ‚   s0    


 z%BertBiGRUAttentionCrf.attention_layerc          
   C   s(  t  |sdn|¡
 t  d¡z t jd| jjd | jjgt j| jd}t jd| jjgt jt  ¡ d}t j|d| jjd gd}t  	t j
 |||¡¡}W d	Q R X t  d
¡V t jd| jj| jjgt j| jd}t jd| jjgt jt  ¡ d}t j
 |||¡}W d	Q R X t  |d| j| jjg¡S Q R X d	S )z{
        :param gru_outputs: [batch_size, num_steps, 2*gru_dim]
        :return: [batch_size, num_steps, num_tags]
        ZprojectÚhiddenÚWrK   )r   r   r)   Úbr-   )r   Nr:   )r#   rM   rV   r"   r   r&   r)   rN   rW   rX   r6   rY   r   r4   )r+   rE   r   r^   r_   Úoutputr]   Zpredr   r   r   r9   ¦   s     z#BertBiGRUAttentionCrf.project_layerc       
   	   C   s&  t  |sdn|¡ d}t j|t j| jd| jjgd t j| jddgdgdd}t  |t  | j| j	dg¡ t j
¡}t j||gdd}t j||gdd}t jt  | jjt  | jdg¡ t j¡| jgdd}t jd| jjd | jjd g| jd| _t||| j|d d	\}	| _t  |	 ¡S Q R X d
S )z^
        :param project_logits: [1, num_steps, num_tags]
        :return: scalar loss
        Zcrf_lossg     @Àr   )r   r-   )rL   Ztransitions)r   r)   )r[   Ztag_indicesZtransition_paramsZsequence_lengthsN)r#   rM   rO   Zonesr   r"   r   r   r2   r4   r&   r$   r%   rV   r)   Ztransr   Zreduce_mean)
r+   Zproject_logitsr3   r   ZsmallZstart_logitsZ
pad_logitsr:   r%   Zlog_likelihoodr   r   r   r;   Á   s(    .".z BertBiGRUAttentionCrf.loss_layer)N)N)N)N)
r   r   r   r,   r*   r5   r7   r8   r9   r;   r   r   r   r   r      s   .

$
r   )Z
tensorflowr#   Ztensorflow.contrib.crfr   Z'tensorflow.contrib.layers.python.layersr   Zmodels.base_configr   Zbertr   r   Úobjectr   r   r   r   r   Ú<module>   s   