3
.dk                 @   sf   d dl Zd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
jZG dd deZG dd	 d	eZdS )
    N)crf_log_likelihood)initializers)
BaseConfig)modelingc               @   sD   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZd
S )Config      d      g      ?gMbP?adamFTN   )__name__
__module____qualname__
batch_sizeZepochZprint_per_batchZclipZdropout_keep_problr	optimizerzeroslowernum_tagslstm_dimmax_seq_lenZ	max_epochZsteps_check r   r   @D:\pycharm\NER-BERT-BiLSTM-CRF--master\models\BERT_BiLSTM_CRF.pyr      s   r   c               @   sB   e Zd Zdd Zdd Zdd Zddd	Zdd
dZdddZdS )BertBiLSTMCrfc             C   s   || _ tjtjd d gdd| _tjtjd d gdd| _tjtjd d gdd| _tjtjd d gdd| _tjtjdd| _	tj
dd	d
| _tj
dd	d
| _tj | _| j  d S )N	input_ids)dtypeshapename
input_masksegment_idsZTargetsZDropout)r   r   r   F)Z	trainableg        )configtfplaceholderint32r   r   r    targetsfloat32dropoutZVariableglobal_stepZbest_dev_f1r   Zxavier_initializerinitializerbert_bilstm_crf)selfr!   r   r   r   __init__    s    
zBertBiLSTMCrf.__init__c             C   s~  t jt j| j}t j|dd}t j|t j| _t j| jd | _	t j| jd | _
| j }t jj|| j}| j|| jj| j}| j|| _| j| j| j| _| jj}t j }tj||\}}	t jj|| td g }
x<|D ]4}d}|j|	krd}n
|
j| td|j|j| qW | jj}|dkr<t jj| jj | _!nt"t j#| j|
}t j$|d	d
\}}| j!j%t&||
| j'd| _(d S )Nr   )Zreduction_indicesr   z**** Trainable Variables **** z, *INIT_FROM_CKPT*z  name = %s, shape = %s%sr   g      ?)Z	clip_norm)r(   ))r"   signabsr   Z
reduce_sumcastr$   lengthsr   r   	num_stepsbert_embeddingnnr'   biLSTM_layerr!   r   project_layerlogits
loss_layerZlossinit_checkpointZtrainable_variablesr   Z"get_assignment_map_from_checkpointtrainZinit_from_checkpointprintr   appendr   ZAdamOptimizerr   ZoptKeyErrorZ	gradientsZclip_by_global_normZapply_gradientszipr(   Ztrain_op)r+   ZusedZlength	embeddinglstm_inputslstm_outputsr:   ZtvarsZassignment_mapZinitialized_variable_namesZ
train_varsvarZinit_stringr   Zgrads_r   r   r   r*   0   s@    





zBertBiLSTMCrf.bert_bilstm_crfc             C   s8   t jj| jj}t j|d| j| j| jdd}|j	 }|S )NTF)r!   Zis_trainingr   r   Ztoken_type_idsZuse_one_hot_embeddings)
r   Z
BertConfigZfrom_json_filer!   Zbert_config_pathZ	BertModelr   r   r    Zget_sequence_output)r+   Zbert_configmodelr@   r   r   r   r4   a   s    zBertBiLSTMCrf.bert_embeddingNc       	      C   s   t j|sdn|j i }x:d
D ]2}t j| tj|d| jdd||< W dQ R X qW t jj|d |d |t j|d\}}W dQ R X t j|dd	S )z|
        :param lstm_inputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, 2*lstm_dim]
        Zchar_BiLSTMforwardbackwardT)Zuse_peepholesr)   Zstate_is_tupleN)r   Zsequence_length   )axis)rF   rG   )	r"   variable_scopernnZCoupledInputForgetGateLSTMCellr)   r5   Zbidirectional_dynamic_rnnr&   concat)	r+   rA   r   r2   r   Z	lstm_cell	directionZoutputsZfinal_statesr   r   r   r6   n   s     
zBertBiLSTMCrf.biLSTM_layerc             C   s(  t j|sdn|
 t jdz t jd| jjd | jjgt j| jd}t jd| jjgt jt j d}t j|d| jjd gd}t j	t j
j|||}W d	Q R X t jd
V t jd| jj| jjgt j| jd}t jd| jjgt jt j d}t j
j|||}W d	Q R X t j|d| j| jjgS Q R X d	S )z
        hidden layer between lstm layer and logits
        :param lstm_outputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, num_tags]
        ZprojecthiddenWrH   )r   r   r)   br   )r   Nr8   r.   r.   )r"   rJ   get_variabler!   r   r&   r)   Zzeros_initializerZreshapeZtanhr5   Z	xw_plus_br   r3   )r+   rB   r   rO   rP   outputrN   Zpredr   r   r   r7      s     zBertBiLSTMCrf.project_layerc       
      C   s&  t j|sdn| d
}t j|t j| jd| jjgd t j| jddgdgdd}t j|t j| j| j	dg t j
}t j||gdd}t j||gdd}t jt j| jjt j| jdg t j| jgdd}t jd| jjd | jjd g| jd| _t||| j|d d\}	| _t j|	 S Q R X d	S )zy
        calculate crf loss
        :param project_logits: [1, num_steps, num_tags]
        :return: scalar loss
        Zcrf_lossg     @@r   )r   )rI   Ztransitions)r   r)   )ZinputsZtag_indicesZtransition_paramsZsequence_lengthsNg     @r.   r.   r.   )r"   rJ   rL   Zonesr   r!   r   r   r1   r3   r&   r$   r%   rQ   r)   Ztransr   Zreduce_mean)
r+   Zproject_logitsr2   r   ZsmallZstart_logitsZ
pad_logitsr8   r%   Zlog_likelihoodr   r   r   r9      s(    .".zBertBiLSTMCrf.loss_layer)N)N)N)	r   r   r   r,   r*   r4   r6   r7   r9   r   r   r   r   r      s   1

r   )Z
tensorflowr"   Ztensorflow.contrib.crfr   Z'tensorflow.contrib.layers.python.layersr   Zmodels.base_configr   Zbertr   Zmodels.rnncellZrnncellrK   r   objectr   r   r   r   r   <module>   s   
