o
    $3i#1                     @   sd   d dl Zd dlZd dlmZ d dlmZ d dlm	Z
 d dlmZ G dd deZG dd deZdS )	    N)
BaseConfig)modeling)layersc                   @   sD   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZd
S )Config      d      g      ?gMbP?adamFTN   )__name__
__module____qualname__
batch_sizeZepochZprint_per_batchZclipZdropout_keep_problr	optimizerzeroslowernum_tagslstm_dimmax_seq_lenZ	max_epochZsteps_check r   r   M/home/tuu73405/projects/NER-BERT-BiLSTM-CRF--master/models/BERT_BiLSTM_CRF.pyr      s    r   c                   @   sB   e Zd Zdd Zdd Zdd Zddd	Zdd
dZdddZdS )BertBiLSTMCrfc                 C   s   || _ tjtjddgtjdtjddd| _tjtjddgtjdtjddd| _tjtjddgtjdtjddd| _tjtjddgtjdddd	| _	tjd
tj
ddd| _tjddd| _tjddd| _tjj | _|   d S )Nr   )shapedtype	input_idsF)initial_valuer   name	trainable
input_masksegment_idsZTargets)r   r   r         ?ZDropoutr   )r   g        )configtfZVariableonesint32r   r   r    r!   targetsfloat32dropoutglobal_stepZbest_dev_f1ZkerasZinitializersZGlorotUniforminitializerbert_bilstm_crf)selfr#   r   r   r   __init__$   sD   zBertBiLSTMCrf.__init__c                 C   st  t t | j}t |}t |t j| _t | jd | _	t | jd | _
|  }t j|| j}| || jj| j}| || _| | j| j| _| jj}t  }t||\}}	t j|| td g }
|D ]}d}|j|	v rxd}n|
| td|j|j| ql| jj}|dkrt j| jj | _!nt"t #| j|
}t j$|dd	\}}| j!j%t&||
| j'd
| _(d S )Nr   z**** Trainable Variables **** z, *INIT_FROM_CKPT*z  name = %s, shape = %s%sr
   r"   )Z	clip_norm)r*   ))r$   Zsignabsr   Z
reduce_sumcastr&   lengthsr   r   	num_stepsbert_embeddingnnr)   biLSTM_layerr#   r   project_layerlogits
loss_layerZlossinit_checkpointZtrainable_variablesr   Z"get_assignment_map_from_checkpointtrainZinit_from_checkpointprintr   appendr   ZAdamOptimizerr   ZoptKeyErrorZ	gradientsZclip_by_global_normZapply_gradientszipr*   Ztrain_op)r-   ZusedZlength	embeddinglstm_inputslstm_outputsr;   ZtvarsZassignment_mapZinitialized_variable_namesZ
train_varsvarZinit_stringr   Zgrads_r   r   r   r,   V   sF   


zBertBiLSTMCrf.bert_bilstm_crfc                 C   s8   t j| jj}t j|d| j| j| jdd}|	 }|S )NTF)r#   Zis_trainingr   r    Ztoken_type_idsZuse_one_hot_embeddings)
r   Z
BertConfigZfrom_json_filer#   Zbert_config_pathZ	BertModelr   r    r!   Zget_sequence_output)r-   Zbert_configmodelrA   r   r   r   r5      s   zBertBiLSTMCrf.bert_embeddingNc           	   
   C   s   t jj|sdn|F i }dD ]%}t jj| tj|d| jdd||< W d   n1 s0w   Y  qt jjjj|d |d |t j	|d\}}W d   n1 sTw   Y  t j
|d	d
S )z|
        :param lstm_inputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, 2*lstm_dim]
        Zchar_BiLSTM)forwardbackwardT)Zuse_peepholesr+   Zstate_is_tupleNrG   rH   )r   Zsequence_length   Zaxis)r$   compatv1variable_scopernnZCoupledInputForgetGateLSTMCellr+   r6   Zbidirectional_dynamic_rnnr(   concat)	r-   rB   r   r3   r   Z	lstm_cellZ	directionZoutputsZfinal_statesr   r   r   r7      s*   
zBertBiLSTMCrf.biLSTM_layerc              	   C   s  t jj|sdn| t jjdH t jjjd| jjd | jjgt j| jd}t jjjd| jjgt jt 	 d}t j
|d| jjd gd}t t jjj|||}W d	   n1 s^w   Y  t jjd
6 t jjjd| jj| jjgt j| jd}t jjjd| jjgt jt 	 d}t jjj|||}W d	   n1 sw   Y  t 
|d| j| jjgW  d	   S 1 sw   Y  d	S )z
        hidden layer between lstm layer and logits
        :param lstm_outputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, num_tags]
        ZprojecthiddenWrI   )r   r   r+   br/   r   Nr9   )r$   rK   rL   rM   get_variabler#   r   r(   r+   Zzeros_initializerZreshapeZtanhr6   Z	xw_plus_br   r4   )r-   rC   r   rQ   rR   outputrP   Zpredr   r   r   r8      s,   
	$zBertBiLSTMCrf.project_layerc           
   	   C   sD  t jj|sdn| d}t j|t j| jd| jjgd t j	| jddgdgdd}t 
|t | j| jdg t j}t j||gdd}t j||gdd}t jt 
| jjt | jdg t j| jgdd}t jjjd| jjd | jjd g| jd| _t||| j|d d	\}	| _t |	 W  d
   S 1 sw   Y  d
S )zy
        calculate crf loss
        :param project_logits: [1, num_steps, num_tags]
        :return: scalar loss
        Zcrf_lossg     @r   rS   r/   rJ   Ztransitions)r   r+   )ZinputsZtag_indicesZtransition_paramsZsequence_lengthsN)r$   rK   rL   rM   rO   r%   r   r#   r   r   r2   r4   r(   r&   r'   rT   r+   ZtransZcrf_log_likelihoodZreduce_mean)
r-   Zproject_logitsr3   r   ZsmallZstart_logitsZ
pad_logitsr9   r'   Zlog_likelihoodr   r   r   r:     s2   ."(
$zBertBiLSTMCrf.loss_layer)N)	r   r   r   r.   r,   r5   r7   r8   r:   r   r   r   r   r   #   s    21

ir   )Z
tensorflowr$   Ztensorflow_addonsZtfaZmodels.base_configr   Zbertr   Zmodels.rnncellZrnncellrN   Ztensorflow.kerasr   r   objectr   r   r   r   r   <module>   s   