B
    p(3ik  ã               @   sh   d dl Zd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ G dd„ deƒZG dd	„ d	eƒZdS )
é    N)Úcrf_log_likelihood)Úinitializers)Ú
BaseConfig)Úmodelingc               @   sD   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZd
S )ÚConfigé€   é   éd   é   g      à?gü©ñÒMbP?ÚadamFTNéÈ   )Ú__name__Ú
__module__Ú__qualname__Ú
batch_sizeZepochZprint_per_batchZclipZdropout_keep_probÚlrÚ	optimizerÚzerosÚlowerÚnum_tagsÚlstm_dimÚmax_seq_lenZ	max_epochZsteps_check© r   r   úM/home/tuu73405/projects/NER-BERT-BiLSTM-CRF--master/models/BERT_BiLSTM_CRF.pyr      s   r   c               @   sB   e Zd Zdd„ Zdd„ Zdd„ Zddd	„Zdd
d„Zddd„ZdS )ÚBertBiLSTMCrfc             C   s®   || _ tjtjd d gdd| _tjtjd d gdd| _tjtjd d gdd| _tjtjd d gdd| _tjtjdd| _	tj
dd	d
| _tj
dd	d
| _t ¡ | _|  ¡  d S )NÚ	input_ids)ÚdtypeÚshapeÚnameÚ
input_maskÚsegment_idsZTargetsZDropout)r   r   r   F)Z	trainableg        )ÚconfigÚtfZplaceholderÚint32r   r   r    ÚtargetsÚfloat32ÚdropoutZVariableÚglobal_stepZbest_dev_f1r   Zxavier_initializerÚinitializerÚbert_bilstm_crf)Úselfr!   r   r   r   Ú__init__    s    
zBertBiLSTMCrf.__init__c             C   s~  t  t  | j¡¡}t j|dd}t  |t j¡| _t  | j¡d | _	t  | j¡d | _
|  ¡ }t j || j¡}|  || jj| j¡}|  |¡| _|  | j| j¡| _| jj}t  ¡ }t ||¡\}}	t j ||¡ tdƒ g }
x<|D ]4}d}|j|	kröd}n
|
 |¡ td|j|j|ƒ qÞW | jj}|d	kr<t j | jj ¡| _!nt"‚t  #| j|
¡}t j$|d
d\}}| j!j%t&||
ƒ| j'd| _(d S )Nr   )Zreduction_indicesr   éÿÿÿÿz**** Trainable Variables ****Ú z, *INIT_FROM_CKPT*z  name = %s, shape = %s%sr   g      ð?)Z	clip_norm)r'   ))r"   ÚsignÚabsr   Z
reduce_sumÚcastr#   Úlengthsr   r   Ú	num_stepsÚbert_embeddingÚnnr&   ÚbiLSTM_layerr!   r   Úproject_layerÚlogitsÚ
loss_layerZlossÚinit_checkpointZtrainable_variablesr   Z"get_assignment_map_from_checkpointÚtrainZinit_from_checkpointÚprintr   Úappendr   ZAdamOptimizerr   ZoptÚKeyErrorZ	gradientsZclip_by_global_normZapply_gradientsÚzipr'   Ztrain_op)r*   ZusedÚlengthÚ	embeddingÚlstm_inputsÚlstm_outputsr9   ZtvarsZassignment_mapZinitialized_variable_namesZ
train_varsÚvarZinit_stringr   ZgradsÚ_r   r   r   r)   0   s@    





zBertBiLSTMCrf.bert_bilstm_crfc             C   s8   t j | jj¡}t j|d| j| j| jdd}| 	¡ }|S )NTF)r!   Zis_trainingr   r   Ztoken_type_idsZuse_one_hot_embeddings)
r   Z
BertConfigZfrom_json_filer!   Zbert_config_pathZ	BertModelr   r   r    Zget_sequence_output)r*   Zbert_configÚmodelr@   r   r   r   r3   a   s    zBertBiLSTMCrf.bert_embeddingNc       	      C   s   t  |sdn|¡j i }x:dD ]2}t  |¡ tj|d| jdd||< W dQ R X qW t jj|d |d |t j|d\}}W dQ R X t j|d	d
S )z|
        :param lstm_inputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, 2*lstm_dim]
        Zchar_BiLSTM)ÚforwardÚbackwardT)Zuse_peepholesr(   Zstate_is_tupleNrF   rG   )r   Zsequence_lengthé   )Úaxis)	r"   Úvariable_scopeÚrnnZCoupledInputForgetGateLSTMCellr(   r4   Zbidirectional_dynamic_rnnr%   Úconcat)	r*   rA   r   r1   r   Z	lstm_cellÚ	directionZoutputsZfinal_statesr   r   r   r5   n   s     
zBertBiLSTMCrf.biLSTM_layerc          
   C   s(  t  |sdn|¡
 t  d¡z t jd| jjd | jjgt j| jd}t jd| jjgt jt  ¡ d}t j|d| jjd gd}t  	t j
 |||¡¡}W d	Q R X t  d
¡V t jd| jj| jjgt j| jd}t jd| jjgt jt  ¡ d}t j
 |||¡}W d	Q R X t  |d| j| jjg¡S Q R X d	S )z®
        hidden layer between lstm layer and logits
        :param lstm_outputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, num_tags]
        ZprojectÚhiddenÚWrH   )r   r   r(   Úbr,   )r   Nr7   )r"   rJ   Úget_variabler!   r   r%   r(   Zzeros_initializerZreshapeZtanhr4   Z	xw_plus_br   r2   )r*   rB   r   rO   rP   ÚoutputrN   Zpredr   r   r   r6   „   s     zBertBiLSTMCrf.project_layerc       
   	   C   s&  t  |sdn|¡ d}t j|t j| jd| jjgd t j| jddgdgdd}t  |t  | j| j	dg¡ t j
¡}t j||gdd}t j||gdd}t jt  | jjt  | jdg¡ t j¡| jgdd}t jd| jjd | jjd g| jd| _t||| j|d d	\}	| _t  |	 ¡S Q R X d
S )zy
        calculate crf loss
        :param project_logits: [1, num_steps, num_tags]
        :return: scalar loss
        Zcrf_lossg     @Àr   )r   r,   )rI   Ztransitions)r   r(   )ZinputsZtag_indicesZtransition_paramsZsequence_lengthsN)r"   rJ   rL   Zonesr   r!   r   r   r0   r2   r%   r#   r$   rQ   r(   Ztransr   Zreduce_mean)
r*   Zproject_logitsr1   r   ZsmallZstart_logitsZ
pad_logitsr7   r$   Zlog_likelihoodr   r   r   r8       s(    .".zBertBiLSTMCrf.loss_layer)N)N)N)	r   r   r   r+   r)   r3   r5   r6   r8   r   r   r   r   r      s   1

r   )Z
tensorflowr"   Ztensorflow.contrib.crfr   Z'tensorflow.contrib.layers.python.layersr   Zmodels.base_configr   Zbertr   Zmodels.rnncellZrnncellrK   r   Úobjectr   r   r   r   r   Ú<module>   s   