Skip to content

Span in NER model error #4

Description

@patfol

The spans in the NER model are incorrect.

Code to reproduce:

from glob import glob
import pandas as pd
import re
from pprint import pprint
import pkg_resources

from pymedextcore.document import Document
from pymedext_eds.annotators import Endlines, SentenceTokenizer, SectionSplitter
from pymedext_eds.utils import rawtext_loader
from pymedext_eds.med import MedicationAnnotator, MedicationNormalizer

endlines = Endlines(["raw_text"], "clean_text", ID="endlines")
sections = SectionSplitter(['clean_text'], "section", ID= 'sections')
sentenceSplitter = SentenceTokenizer(["section"],"sentence", ID="sentences")

models_param = [{'tagger_path':'data/models/apmed5/entities/final-model.pt' ,
                'tag_name': 'entity_pred' },
                {'tagger_path':'data/models/apmed5/events/final-model.pt' ,
                'tag_name': 'event_pred' },
               {'tagger_path': "data/models/apmed5/drugblob/final-model.pt",
                'tag_name': 'drugblob_pred'}]

med = MedicationAnnotator(['sentence'], 'med', ID='med:v2', models_param=models_param,  device='cuda:1')

data_path = pkg_resources.resource_filename('pymedext_eds', 'data/romedi')
romedi_path = glob(data_path + '/*.p')[0]

norm = MedicationNormalizer(['ENT/DRUG','ENT/CLASS'], 'normalized_mention', ID='norm',romedi_path= romedi_path)

pipeline = [endlines,sections, sentenceSplitter, med, norm]

data_path = pkg_resources.resource_filename('pymedext_eds', 'data/demo')
file_list = glob(data_path + '/*.txt')

docs = [rawtext_loader(x) for x in file_list]

for doc in docs:
    doc.annotate(pipeline)

[t.value for t in docs[0].get_annotations('ENT/DRUG')]

docs[0].get_annotations('clean_text')[0].value[5687:5691]

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions