
Excellent! Next, you can register to create a new web site with this listing, or, if you already
have a page, you can embed it there using one of several options.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
- renew the authorization for BibBase on Mendeley, and
- update the BibBase URL in your page the same way you did when you initially set up this page.
2021
(1)
Neural data-to-text generation with dynamic content planning.
Chen, K.; Li, F.; Hu, B.; Peng, W.; Chen, Q.; Yu, H.; and Xiang, Y.
Knowledge-Based Systems, 215: 106610. 2021.
Publisher: Elsevier
bibtex
bibtex
@article{chen_neural_2021, title = {Neural data-to-text generation with dynamic content planning}, volume = {215}, journal = {Knowledge-Based Systems}, author = {Chen, Kai and Li, Fayuan and Hu, Baotian and Peng, Weihua and Chen, Qingcai and Yu, Hong and Xiang, Yang}, year = {2021}, note = {Publisher: Elsevier}, pages = {106610}, }
2020
(14)
Conversational machine comprehension: a literature review.
Gupta, S.; and Rawat, B. P. S.
arXiv preprint arXiv:2006.00671. 2020.
bibtex
bibtex
@article{gupta_conversational_2020, title = {Conversational machine comprehension: a literature review}, shorttitle = {Conversational machine comprehension}, journal = {arXiv preprint arXiv:2006.00671}, author = {Gupta, Somil and Rawat, Bhanu Pratap Singh}, year = {2020}, }
Neural Data-to-Text Generation with Dynamic Content Planning.
Chen, K.; Li, F.; Hu, B.; Peng, W.; Chen, Q.; and Yu, H.
arXiv:2004.07426 [cs]. April 2020.
arXiv: 2004.07426
Paper
bibtex
abstract
@article{chen_neural_2020, title = {Neural {Data}-to-{Text} {Generation} with {Dynamic} {Content} {Planning}}, url = {http://arxiv.org/abs/2004.07426}, abstract = {Neural data-to-text generation models have achieved significant advancement in recent years. However, these models have two shortcomings: the generated texts tend to miss some vital information, and they often generate descriptions that are not consistent with the structured input data. To alleviate these problems, we propose a Neural data-to-text generation model with Dynamic content Planning, named NDP for abbreviation. The NDP can utilize the previously generated text to dynamically select the appropriate entry from the given structured data. We further design a reconstruction mechanism with a novel objective function that can reconstruct the whole entry of the used data sequentially from the hidden states of the decoder, which aids the accuracy of the generated text. Empirical results show that the NDP achieves superior performance over the state-of-the-art on ROTOWIRE dataset, in terms of relation generation (RG), content selection (CS), content ordering (CO) and BLEU metrics. The human evaluation result shows that the texts generated by the proposed NDP are better than the corresponding ones generated by NCP in most of time. And using the proposed reconstruction mechanism, the fidelity of the generated text can be further improved significantly.}, urldate = {2020-12-29}, journal = {arXiv:2004.07426 [cs]}, author = {Chen, Kai and Li, Fayuan and Hu, Baotian and Peng, Weihua and Chen, Qingcai and Yu, Hong}, month = apr, year = {2020}, note = {arXiv: 2004.07426}, keywords = {Computer Science - Computation and Language}, }
Neural data-to-text generation models have achieved significant advancement in recent years. However, these models have two shortcomings: the generated texts tend to miss some vital information, and they often generate descriptions that are not consistent with the structured input data. To alleviate these problems, we propose a Neural data-to-text generation model with Dynamic content Planning, named NDP for abbreviation. The NDP can utilize the previously generated text to dynamically select the appropriate entry from the given structured data. We further design a reconstruction mechanism with a novel objective function that can reconstruct the whole entry of the used data sequentially from the hidden states of the decoder, which aids the accuracy of the generated text. Empirical results show that the NDP achieves superior performance over the state-of-the-art on ROTOWIRE dataset, in terms of relation generation (RG), content selection (CS), content ordering (CO) and BLEU metrics. The human evaluation result shows that the texts generated by the proposed NDP are better than the corresponding ones generated by NCP in most of time. And using the proposed reconstruction mechanism, the fidelity of the generated text can be further improved significantly.
Neural data-to-text generation with dynamic content planning.
Chen, K.; Li, F.; Hu, B.; Peng, W.; Chen, Q.; Yu, H.; and Xiang, Y.
Knowledge-Based Systems,106610. November 2020.
Paper
doi
bibtex
abstract
@article{chen_neural_2020-1, title = {Neural data-to-text generation with dynamic content planning}, issn = {0950-7051}, url = {http://www.sciencedirect.com/science/article/pii/S0950705120307395}, doi = {10.1016/j.knosys.2020.106610}, abstract = {Neural data-to-text generation models have achieved significant advancement in recent years. However, these models have two shortcomings: the generated texts tend to miss some vital information, and they often generate descriptions that are not consistent with the structured input data. To alleviate these problems, we propose a Neural data-to-text generation model with Dynamic content Planning, named NDP 2 2This work was completed in cooperation with Baidu Inc.for abbreviation. The NDP can utilize the previously generated text to dynamically select the appropriate entry from the given structured data. We further design a reconstruction mechanism with a novel objective function that can reconstruct the whole entry of the used data sequentially from the hidden states of the decoder, which aids the accuracy of the generated text. Empirical results show that the NDP achieves superior performance over the state-of-the-art on ROTOWIRE and NBAZHN datasets, in terms of relation generation (RG), content selection (CS), content ordering (CO) and BLEU metrics. The human evaluation result shows that the texts generated by the proposed NDP are better than the corresponding ones generated by NCP in most of time. And using the proposed reconstruction mechanism, the fidelity of the generated text can be further improved significantly.}, language = {en}, urldate = {2020-12-29}, journal = {Knowledge-Based Systems}, author = {Chen, Kai and Li, Fayuan and Hu, Baotian and Peng, Weihua and Chen, Qingcai and Yu, Hong and Xiang, Yang}, month = nov, year = {2020}, keywords = {Data-to-text, Dynamic content planning, Reconstruction mechanism}, pages = {106610}, }
Neural data-to-text generation models have achieved significant advancement in recent years. However, these models have two shortcomings: the generated texts tend to miss some vital information, and they often generate descriptions that are not consistent with the structured input data. To alleviate these problems, we propose a Neural data-to-text generation model with Dynamic content Planning, named NDP 2 2This work was completed in cooperation with Baidu Inc.for abbreviation. The NDP can utilize the previously generated text to dynamically select the appropriate entry from the given structured data. We further design a reconstruction mechanism with a novel objective function that can reconstruct the whole entry of the used data sequentially from the hidden states of the decoder, which aids the accuracy of the generated text. Empirical results show that the NDP achieves superior performance over the state-of-the-art on ROTOWIRE and NBAZHN datasets, in terms of relation generation (RG), content selection (CS), content ordering (CO) and BLEU metrics. The human evaluation result shows that the texts generated by the proposed NDP are better than the corresponding ones generated by NCP in most of time. And using the proposed reconstruction mechanism, the fidelity of the generated text can be further improved significantly.
Dynamic Data Selection for Curriculum Learning via Ability Estimation.
Lalor, J. P.; and Yu, H.
In Findings of the Association for Computational Linguistics: EMNLP 2020, pages 545–555, Online, November 2020. Association for Computational Linguistics
Paper
bibtex
abstract
@inproceedings{lalor_dynamic_2020, address = {Online}, title = {Dynamic {Data} {Selection} for {Curriculum} {Learning} via {Ability} {Estimation}}, url = {https://www.aclweb.org/anthology/2020.findings-emnlp.48}, abstract = {Curriculum learning methods typically rely on heuristics to estimate the difficulty of training examples or the ability of the model. In this work, we propose replacing difficulty heuristics with learned difficulty parameters. We also propose Dynamic Data selection for Curriculum Learning via Ability Estimation (DDaCLAE), a strategy that probes model ability at each training epoch to select the best training examples at that point. We show that models using learned difficulty and/or ability outperform heuristic-based curriculum learning models on the GLUE classification tasks.}, urldate = {2020-11-29}, booktitle = {Findings of the {Association} for {Computational} {Linguistics}: {EMNLP} 2020}, publisher = {Association for Computational Linguistics}, author = {Lalor, John P. and Yu, Hong}, month = nov, year = {2020}, pages = {545--555}, }
Curriculum learning methods typically rely on heuristics to estimate the difficulty of training examples or the ability of the model. In this work, we propose replacing difficulty heuristics with learned difficulty parameters. We also propose Dynamic Data selection for Curriculum Learning via Ability Estimation (DDaCLAE), a strategy that probes model ability at each training epoch to select the best training examples at that point. We show that models using learned difficulty and/or ability outperform heuristic-based curriculum learning models on the GLUE classification tasks.
Generating Accurate Electronic Health Assessment from Medical Graph.
Yang, Z.; and Yu, H.
In Findings of the Association for Computational Linguistics: EMNLP 2020, pages 3764–3773, Online, November 2020. Association for Computational Linguistics
Paper
bibtex
abstract
@inproceedings{yang_generating_2020, address = {Online}, title = {Generating {Accurate} {Electronic} {Health} {Assessment} from {Medical} {Graph}}, url = {https://www.aclweb.org/anthology/2020.findings-emnlp.336}, abstract = {One of the fundamental goals of artificial intelligence is to build computer-based expert systems. Inferring clinical diagnoses to generate a clinical assessment during a patient encounter is a crucial step towards building a medical diagnostic system. Previous works were mainly based on either medical domain-specific knowledge, or patients' prior diagnoses and clinical encounters. In this paper, we propose a novel model for automated clinical assessment generation (MCAG). MCAG is built on an innovative graph neural network, where rich clinical knowledge is incorporated into an end-to-end corpus-learning system. Our evaluation results against physician generated gold standard show that MCAG significantly improves the BLEU and rouge score compared with competitive baseline models. Further, physicians' evaluation showed that MCAG could generate high-quality assessments.}, urldate = {2020-11-29}, booktitle = {Findings of the {Association} for {Computational} {Linguistics}: {EMNLP} 2020}, publisher = {Association for Computational Linguistics}, author = {Yang, Zhichao and Yu, Hong}, month = nov, year = {2020}, pages = {3764--3773}, }
One of the fundamental goals of artificial intelligence is to build computer-based expert systems. Inferring clinical diagnoses to generate a clinical assessment during a patient encounter is a crucial step towards building a medical diagnostic system. Previous works were mainly based on either medical domain-specific knowledge, or patients' prior diagnoses and clinical encounters. In this paper, we propose a novel model for automated clinical assessment generation (MCAG). MCAG is built on an innovative graph neural network, where rich clinical knowledge is incorporated into an end-to-end corpus-learning system. Our evaluation results against physician generated gold standard show that MCAG significantly improves the BLEU and rouge score compared with competitive baseline models. Further, physicians' evaluation showed that MCAG could generate high-quality assessments.
BENTO: A Visual Platform for Building Clinical NLP Pipelines Based on CodaLab.
Jin, Y; Li, F; and Yu, H
In AMIA Fall Symposium, 2020.
bibtex
bibtex
@inproceedings{jin_bento_2020, title = {{BENTO}: {A} {Visual} {Platform} for {Building} {Clinical} {NLP} {Pipelines} {Based} on {CodaLab}.}, booktitle = {{AMIA} {Fall} {Symposium}}, author = {Jin, Y and Li, F and Yu, H}, year = {2020}, }
Neural Multi-Task Learning for Adverse Drug Reaction Extraction.
Liu, F; and Yu, H
In AMIA Fall Symposium, 2020.
bibtex
bibtex
@inproceedings{liu_neural_2020, title = {Neural {Multi}-{Task} {Learning} for {Adverse} {Drug} {Reaction} {Extraction}}, booktitle = {{AMIA} {Fall} {Symposium}}, author = {Liu, F and Yu, H}, year = {2020}, }
Bleeding Entity Recognition in Electronic Health Records: A Comprehensive Analysis of End-to-End Systems.
Mitra, A; Singh, B.; and Yu, H
In AMIA Fall Symposium, 2020.
bibtex
bibtex
@inproceedings{mitra_bleeding_2020, title = {Bleeding {Entity} {Recognition} in {Electronic} {Health} {Records}: {A} {Comprehensive} {Analysis} of {End}-to-{End} {Systems}}, booktitle = {{AMIA} {Fall} {Symposium}}, author = {Mitra, A and Singh, BP and Yu, H}, year = {2020}, }
Inferring ADR causality by predicting the Naranjo Score from Clinical Notes.
Singh, B.; Jagannatha, A; Liu, F; and Yu, H
In AMIA Fall Symposium, 2020.
bibtex
bibtex
@inproceedings{singh_inferring_2020, title = {Inferring {ADR} causality by predicting the {Naranjo} {Score} from {Clinical} {Notes}}, booktitle = {{AMIA} {Fall} {Symposium}}, author = {Singh, BP and Jagannatha, A and Liu, F and Yu, H}, year = {2020}, }
Learning Latent Space Representations to Predict Patient Outcomes: Model Development and Validation.
Rongali, S.; Rose, A. J.; McManus, D. D.; Bajracharya, A. S.; Kapoor, A.; Granillo, E.; and Yu, H.
Journal of Medical Internet Research, 22(3): e16374. 2020.
Company: Journal of Medical Internet Research Distributor: Journal of Medical Internet Research Institution: Journal of Medical Internet Research Label: Journal of Medical Internet Research Publisher: JMIR Publications Inc., Toronto, Canada
Paper
doi
bibtex
abstract
@article{rongali_learning_2020, title = {Learning {Latent} {Space} {Representations} to {Predict} {Patient} {Outcomes}: {Model} {Development} and {Validation}}, volume = {22}, shorttitle = {Learning {Latent} {Space} {Representations} to {Predict} {Patient} {Outcomes}}, url = {https://www.jmir.org/2020/3/e16374/}, doi = {10.2196/16374}, abstract = {Background: Scalable and accurate health outcome prediction using electronic health record (EHR) data has gained much attention in research recently. Previous machine learning models mostly ignore relations between different types of clinical data (ie, laboratory components, International Classification of Diseases codes, and medications). Objective: This study aimed to model such relations and build predictive models using the EHR data from intensive care units. We developed innovative neural network models and compared them with the widely used logistic regression model and other state-of-the-art neural network models to predict the patient’s mortality using their longitudinal EHR data. Methods: We built a set of neural network models that we collectively called as long short-term memory (LSTM) outcome prediction using comprehensive feature relations or in short, CLOUT. Our CLOUT models use a correlational neural network model to identify a latent space representation between different types of discrete clinical features during a patient’s encounter and integrate the latent representation into an LSTM-based predictive model framework. In addition, we designed an ablation experiment to identify risk factors from our CLOUT models. Using physicians’ input as the gold standard, we compared the risk factors identified by both CLOUT and logistic regression models. Results: Experiments on the Medical Information Mart for Intensive Care-III dataset (selected patient population: 7537) show that CLOUT (area under the receiver operating characteristic curve=0.89) has surpassed logistic regression (0.82) and other baseline NN models (\<0.86). In addition, physicians’ agreement with the CLOUT-derived risk factor rankings was statistically significantly higher than the agreement with the logistic regression model. Conclusions: Our results support the applicability of CLOUT for real-world clinical use in identifying patients at high risk of mortality. Trial Registration: [J Med Internet Res 2020;22(3):e16374]}, language = {en}, number = {3}, urldate = {2020-04-07}, journal = {Journal of Medical Internet Research}, author = {Rongali, Subendhu and Rose, Adam J. and McManus, David D. and Bajracharya, Adarsha S. and Kapoor, Alok and Granillo, Edgard and Yu, Hong}, year = {2020}, pmid = {32202503 PMCID: PMC7136840}, note = {Company: Journal of Medical Internet Research Distributor: Journal of Medical Internet Research Institution: Journal of Medical Internet Research Label: Journal of Medical Internet Research Publisher: JMIR Publications Inc., Toronto, Canada}, pages = {e16374}, }
Background: Scalable and accurate health outcome prediction using electronic health record (EHR) data has gained much attention in research recently. Previous machine learning models mostly ignore relations between different types of clinical data (ie, laboratory components, International Classification of Diseases codes, and medications). Objective: This study aimed to model such relations and build predictive models using the EHR data from intensive care units. We developed innovative neural network models and compared them with the widely used logistic regression model and other state-of-the-art neural network models to predict the patient’s mortality using their longitudinal EHR data. Methods: We built a set of neural network models that we collectively called as long short-term memory (LSTM) outcome prediction using comprehensive feature relations or in short, CLOUT. Our CLOUT models use a correlational neural network model to identify a latent space representation between different types of discrete clinical features during a patient’s encounter and integrate the latent representation into an LSTM-based predictive model framework. In addition, we designed an ablation experiment to identify risk factors from our CLOUT models. Using physicians’ input as the gold standard, we compared the risk factors identified by both CLOUT and logistic regression models. Results: Experiments on the Medical Information Mart for Intensive Care-III dataset (selected patient population: 7537) show that CLOUT (area under the receiver operating characteristic curve=0.89) has surpassed logistic regression (0.82) and other baseline NN models (<0.86). In addition, physicians’ agreement with the CLOUT-derived risk factor rankings was statistically significantly higher than the agreement with the logistic regression model. Conclusions: Our results support the applicability of CLOUT for real-world clinical use in identifying patients at high risk of mortality. Trial Registration: [J Med Internet Res 2020;22(3):e16374]
Calibrating Structured Output Predictors for Natural Language Processing.
Jagannatha, A.; and Yu, H.
In 2020 Annual Conference of the Association for Computational Linguistics (ACL), 2020.
bibtex abstract
bibtex abstract
@inproceedings{jagannatha_calibrating_2020, title = {Calibrating {Structured} {Output} {Predictors} for {Natural} {Language} {Processing}.}, abstract = {We address the problem of calibrating prediction confidence for output entities of interest in natural language processing (NLP) applications. It is important that NLP applications such as named entity recognition and question answering produce calibrated confidence scores for their predictions, especially if the system is to be deployed in a safety-critical domain such as healthcare. However, the output space of such structured prediction models is often too large to adapt binary or multi-class calibration methods directly. In this study, we propose a general calibration scheme for output entities of interest in neural-network based structured prediction models. Our proposed method can be used with any binary class calibration scheme and a neural network model. Additionally, we show that our calibration method can also be used as an uncertainty-aware, entity-specific decoding step to improve the performance of the underlying model at no additional training cost or data requirements. We show that our method outperforms current calibration techniques for named-entity-recognition, part-of-speech and question answering. We also improve our model's performance from our decoding step across several tasks and benchmark datasets. Our method improves the calibration and model performance on out-of-domain test scenarios as well.}, booktitle = {2020 {Annual} {Conference} of the {Association} for {Computational} {Linguistics} ({ACL})}, author = {Jagannatha, Abhyuday and Yu, Hong}, year = {2020}, }
We address the problem of calibrating prediction confidence for output entities of interest in natural language processing (NLP) applications. It is important that NLP applications such as named entity recognition and question answering produce calibrated confidence scores for their predictions, especially if the system is to be deployed in a safety-critical domain such as healthcare. However, the output space of such structured prediction models is often too large to adapt binary or multi-class calibration methods directly. In this study, we propose a general calibration scheme for output entities of interest in neural-network based structured prediction models. Our proposed method can be used with any binary class calibration scheme and a neural network model. Additionally, we show that our calibration method can also be used as an uncertainty-aware, entity-specific decoding step to improve the performance of the underlying model at no additional training cost or data requirements. We show that our method outperforms current calibration techniques for named-entity-recognition, part-of-speech and question answering. We also improve our model's performance from our decoding step across several tasks and benchmark datasets. Our method improves the calibration and model performance on out-of-domain test scenarios as well.
BENTO: A Visual Platform for Building Clinical NLP Pipelines Based on CodaLab.
Jin, Y.; Li, F.; and Yu, H.
In 2020 Annual Conference of the Association for Computational Linguistics (ACL), 2020.
bibtex
bibtex
@inproceedings{jin_bento_2020-1, title = {{BENTO}: {A} {Visual} {Platform} for {Building} {Clinical} {NLP} {Pipelines} {Based} on {CodaLab}.}, booktitle = {2020 {Annual} {Conference} of the {Association} for {Computational} {Linguistics} ({ACL})}, author = {Jin, Yonghao and Li, Fei and Yu, Hong}, year = {2020}, }
ICD Coding from Clinical Text Using Multi-‐Filter Residual Convolutional Neural Network. T.
Li; and Yu, H.
In The Thirty-Fourth AAAI Conference on Artificial Intelligence (AAAI-20), 2020.
bibtex
bibtex
@inproceedings{li_icd_2020, title = {{ICD} {Coding} from {Clinical} {Text} {Using} {Multi}-‐{Filter} {Residual} {Convolutional} {Neural} {Network}. {T}}, booktitle = {The {Thirty}-{Fourth} {AAAI} {Conference} on {Artificial} {Intelligence} ({AAAI}-20)}, author = {Li and Yu, Hong}, year = {2020}, }
Generating Medical Assessments Using a Neural Network Model: Algorithm Development and Validation.
Hu, B.; Bajracharya, A.; and Yu, H.
JMIR Medical Informatics, 8(1): e14971. 2020.
Company: JMIR Medical Informatics Distributor: JMIR Medical Informatics Institution: JMIR Medical Informatics Label: JMIR Medical Informatics Publisher: JMIR Publications Inc., Toronto, Canada
Paper
doi
bibtex
abstract
@article{hu_generating_2020, title = {Generating {Medical} {Assessments} {Using} a {Neural} {Network} {Model}: {Algorithm} {Development} and {Validation}}, volume = {8}, copyright = {Unless stated otherwise, all articles are open-access distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work (}, shorttitle = {Generating {Medical} {Assessments} {Using} a {Neural} {Network} {Model}}, url = {https://medinform.jmir.org/2020/1/e14971/}, doi = {10.2196/14971}, abstract = {Background: Since its inception, artificial intelligence has aimed to use computers to help make clinical diagnoses. Evidence-based medical reasoning is important for patient care. Inferring clinical diagnoses is a crucial step during the patient encounter. Previous works mainly used expert systems or machine learning–based methods to predict the International Classification of Diseases - Clinical Modification codes based on electronic health records. We report an alternative approach: inference of clinical diagnoses from patients’ reported symptoms and physicians’ clinical observations. Objective: We aimed to report a natural language processing system for generating medical assessments based on patient information described in the electronic health record (EHR) notes. Methods: We processed EHR notes into the Subjective, Objective, Assessment, and Plan sections. We trained a neural network model for medical assessment generation (N2MAG). Our N2MAG is an innovative deep neural model that uses the Subjective and Objective sections of an EHR note to automatically generate an “expert-like” assessment of the patient. N2MAG can be trained in an end-to-end fashion and does not require feature engineering and external knowledge resources. Results: We evaluated N2MAG and the baseline models both quantitatively and qualitatively. Evaluated by both the Recall-Oriented Understudy for Gisting Evaluation metrics and domain experts, our results show that N2MAG outperformed the existing state-of-the-art baseline models. Conclusions: N2MAG could generate a medical assessment from the Subject and Objective section descriptions in EHR notes. Future work will assess its potential for providing clinical decision support. [JMIR Med Inform 2020;8(1):e14971]}, language = {en}, number = {1}, urldate = {2020-04-07}, journal = {JMIR Medical Informatics}, author = {Hu, Baotian and Bajracharya, Adarsha and Yu, Hong}, year = {2020}, note = {Company: JMIR Medical Informatics Distributor: JMIR Medical Informatics Institution: JMIR Medical Informatics Label: JMIR Medical Informatics Publisher: JMIR Publications Inc., Toronto, Canada}, pages = {e14971}, }
Background: Since its inception, artificial intelligence has aimed to use computers to help make clinical diagnoses. Evidence-based medical reasoning is important for patient care. Inferring clinical diagnoses is a crucial step during the patient encounter. Previous works mainly used expert systems or machine learning–based methods to predict the International Classification of Diseases - Clinical Modification codes based on electronic health records. We report an alternative approach: inference of clinical diagnoses from patients’ reported symptoms and physicians’ clinical observations. Objective: We aimed to report a natural language processing system for generating medical assessments based on patient information described in the electronic health record (EHR) notes. Methods: We processed EHR notes into the Subjective, Objective, Assessment, and Plan sections. We trained a neural network model for medical assessment generation (N2MAG). Our N2MAG is an innovative deep neural model that uses the Subjective and Objective sections of an EHR note to automatically generate an “expert-like” assessment of the patient. N2MAG can be trained in an end-to-end fashion and does not require feature engineering and external knowledge resources. Results: We evaluated N2MAG and the baseline models both quantitatively and qualitatively. Evaluated by both the Recall-Oriented Understudy for Gisting Evaluation metrics and domain experts, our results show that N2MAG outperformed the existing state-of-the-art baseline models. Conclusions: N2MAG could generate a medical assessment from the Subject and Objective section descriptions in EHR notes. Future work will assess its potential for providing clinical decision support. [JMIR Med Inform 2020;8(1):e14971]
2019
(19)
Fine-Tuning Bidirectional Encoder Representations From Transformers (BERT)–Based Models on Large-Scale Electronic Health Record Notes: An Empirical Study.
Li, F.; Jin, Y.; Liu, W.; Rawat, B. P. S.; Cai, P.; and Yu, H.
JMIR Medical Informatics, 7(3): e14830. September 2019.
Paper
doi
bibtex
@article{li_fine-tuning_2019, title = {Fine-{Tuning} {Bidirectional} {Encoder} {Representations} {From} {Transformers} ({BERT})–{Based} {Models} on {Large}-{Scale} {Electronic} {Health} {Record} {Notes}: {An} {Empirical} {Study}}, volume = {7}, issn = {2291-9694}, shorttitle = {Fine-{Tuning} {Bidirectional} {Encoder} {Representations} {From} {Transformers} ({BERT})–{Based} {Models} on {Large}-{Scale} {Electronic} {Health} {Record} {Notes}}, url = {http://medinform.jmir.org/2019/3/e14830/}, doi = {10.2196/14830}, language = {en}, number = {3}, urldate = {2019-10-07}, journal = {JMIR Medical Informatics}, author = {Li, Fei and Jin, Yonghao and Liu, Weisong and Rawat, Bhanu Pratap Singh and Cai, Pengshan and Yu, Hong}, month = sep, year = {2019}, pmid = {31516126 PMCID: PMC6746103}, pages = {e14830}, }
Detecting Hypoglycemia Incidents Reported in Patients’ Secure Messages: Using Cost-Sensitive Learning and Oversampling to Reduce Data Imbalance.
Chen, J.; Lalor, J.; Liu, W.; Druhl, E.; Granillo, E.; Vimalananda, V. G; and Yu, H.
Journal of Medical Internet Research, 21(3). March 2019.
Paper
doi
bibtex
abstract
@article{chen_detecting_2019, title = {Detecting {Hypoglycemia} {Incidents} {Reported} in {Patients}’ {Secure} {Messages}: {Using} {Cost}-{Sensitive} {Learning} and {Oversampling} to {Reduce} {Data} {Imbalance}}, volume = {21}, issn = {1439-4456}, shorttitle = {Detecting {Hypoglycemia} {Incidents} {Reported} in {Patients}’ {Secure} {Messages}}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6431826/}, doi = {10.2196/11990}, abstract = {Background Improper dosing of medications such as insulin can cause hypoglycemic episodes, which may lead to severe morbidity or even death. Although secure messaging was designed for exchanging nonurgent messages, patients sometimes report hypoglycemia events through secure messaging. Detecting these patient-reported adverse events may help alert clinical teams and enable early corrective actions to improve patient safety. Objective We aimed to develop a natural language processing system, called HypoDetect (Hypoglycemia Detector), to automatically identify hypoglycemia incidents reported in patients’ secure messages. Methods An expert in public health annotated 3000 secure message threads between patients with diabetes and US Department of Veterans Affairs clinical teams as containing patient-reported hypoglycemia incidents or not. A physician independently annotated 100 threads randomly selected from this dataset to determine interannotator agreement. We used this dataset to develop and evaluate HypoDetect. HypoDetect incorporates 3 machine learning algorithms widely used for text classification: linear support vector machines, random forest, and logistic regression. We explored different learning features, including new knowledge-driven features. Because only 114 (3.80\%) messages were annotated as positive, we investigated cost-sensitive learning and oversampling methods to mitigate the challenge of imbalanced data. Results The interannotator agreement was Cohen kappa=.976. Using cross-validation, logistic regression with cost-sensitive learning achieved the best performance (area under the receiver operating characteristic curve=0.954, sensitivity=0.693, specificity 0.974, F1 score=0.590). Cost-sensitive learning and the ensembled synthetic minority oversampling technique improved the sensitivity of the baseline systems substantially (by 0.123 to 0.728 absolute gains). Our results show that a variety of features contributed to the best performance of HypoDetect. Conclusions Despite the challenge of data imbalance, HypoDetect achieved promising results for the task of detecting hypoglycemia incidents from secure messages. The system has a great potential to facilitate early detection and treatment of hypoglycemia.}, number = {3}, urldate = {2019-12-29}, journal = {Journal of Medical Internet Research}, author = {Chen, Jinying and Lalor, John and Liu, Weisong and Druhl, Emily and Granillo, Edgard and Vimalananda, Varsha G and Yu, Hong}, month = mar, year = {2019}, pmid = {30855231 PMCID: PMC6431826}, }
Background Improper dosing of medications such as insulin can cause hypoglycemic episodes, which may lead to severe morbidity or even death. Although secure messaging was designed for exchanging nonurgent messages, patients sometimes report hypoglycemia events through secure messaging. Detecting these patient-reported adverse events may help alert clinical teams and enable early corrective actions to improve patient safety. Objective We aimed to develop a natural language processing system, called HypoDetect (Hypoglycemia Detector), to automatically identify hypoglycemia incidents reported in patients’ secure messages. Methods An expert in public health annotated 3000 secure message threads between patients with diabetes and US Department of Veterans Affairs clinical teams as containing patient-reported hypoglycemia incidents or not. A physician independently annotated 100 threads randomly selected from this dataset to determine interannotator agreement. We used this dataset to develop and evaluate HypoDetect. HypoDetect incorporates 3 machine learning algorithms widely used for text classification: linear support vector machines, random forest, and logistic regression. We explored different learning features, including new knowledge-driven features. Because only 114 (3.80%) messages were annotated as positive, we investigated cost-sensitive learning and oversampling methods to mitigate the challenge of imbalanced data. Results The interannotator agreement was Cohen kappa=.976. Using cross-validation, logistic regression with cost-sensitive learning achieved the best performance (area under the receiver operating characteristic curve=0.954, sensitivity=0.693, specificity 0.974, F1 score=0.590). Cost-sensitive learning and the ensembled synthetic minority oversampling technique improved the sensitivity of the baseline systems substantially (by 0.123 to 0.728 absolute gains). Our results show that a variety of features contributed to the best performance of HypoDetect. Conclusions Despite the challenge of data imbalance, HypoDetect achieved promising results for the task of detecting hypoglycemia incidents from secure messages. The system has a great potential to facilitate early detection and treatment of hypoglycemia.
Automatic Detection of Hypoglycemic Events From the Electronic Health Record Notes of Diabetes Patients: Empirical Study.
Jin, Y.; Li, F.; Vimalananda, V. G.; and Yu, H.
JMIR Medical Informatics, 7(4): e14340. 2019.
Paper
doi
bibtex
abstract
@article{jin_automatic_2019, title = {Automatic {Detection} of {Hypoglycemic} {Events} {From} the {Electronic} {Health} {Record} {Notes} of {Diabetes} {Patients}: {Empirical} {Study}}, volume = {7}, copyright = {Unless stated otherwise, all articles are open-access distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work (}, shorttitle = {Automatic {Detection} of {Hypoglycemic} {Events} {From} the {Electronic} {Health} {Record} {Notes} of {Diabetes} {Patients}}, url = {https://medinform.jmir.org/2019/4/e14340/}, doi = {10.2196/14340}, abstract = {Background: Hypoglycemic events are common and potentially dangerous conditions among patients being treated for diabetes. Automatic detection of such events could improve patient care and is valuable in population studies. Electronic health records (EHRs) are valuable resources for the detection of such events. Objective: In this study, we aim to develop a deep-learning–based natural language processing (NLP) system to automatically detect hypoglycemic events from EHR notes. Our model is called the High-Performing System for Automatically Detecting Hypoglycemic Events (HYPE). Methods: Domain experts reviewed 500 EHR notes of diabetes patients to determine whether each sentence contained a hypoglycemic event or not. We used this annotated corpus to train and evaluate HYPE, the high-performance NLP system for hypoglycemia detection. We built and evaluated both a classical machine learning model (ie, support vector machines [SVMs]) and state-of-the-art neural network models. Results: We found that neural network models outperformed the SVM model. The convolutional neural network (CNN) model yielded the highest performance in a 10-fold cross-validation setting: mean precision=0.96 (SD 0.03), mean recall=0.86 (SD 0.03), and mean F1=0.91 (SD 0.03). Conclusions: Despite the challenges posed by small and highly imbalanced data, our CNN-based HYPE system still achieved a high performance for hypoglycemia detection. HYPE can be used for EHR-based hypoglycemia surveillance and population studies in diabetes patients. [JMIR Med Inform 2019;7(4):e14340]}, language = {en}, number = {4}, urldate = {2019-11-10}, journal = {JMIR Medical Informatics}, author = {Jin, Yonghao and Li, Fei and Vimalananda, Varsha G. and Yu, Hong}, year = {2019}, pmid = {31702562 PMCID: PMC6913754}, keywords = {adverse events, convolutional neural networks, hypoglycemia, natural language processing}, pages = {e14340}, }
Background: Hypoglycemic events are common and potentially dangerous conditions among patients being treated for diabetes. Automatic detection of such events could improve patient care and is valuable in population studies. Electronic health records (EHRs) are valuable resources for the detection of such events. Objective: In this study, we aim to develop a deep-learning–based natural language processing (NLP) system to automatically detect hypoglycemic events from EHR notes. Our model is called the High-Performing System for Automatically Detecting Hypoglycemic Events (HYPE). Methods: Domain experts reviewed 500 EHR notes of diabetes patients to determine whether each sentence contained a hypoglycemic event or not. We used this annotated corpus to train and evaluate HYPE, the high-performance NLP system for hypoglycemia detection. We built and evaluated both a classical machine learning model (ie, support vector machines [SVMs]) and state-of-the-art neural network models. Results: We found that neural network models outperformed the SVM model. The convolutional neural network (CNN) model yielded the highest performance in a 10-fold cross-validation setting: mean precision=0.96 (SD 0.03), mean recall=0.86 (SD 0.03), and mean F1=0.91 (SD 0.03). Conclusions: Despite the challenges posed by small and highly imbalanced data, our CNN-based HYPE system still achieved a high performance for hypoglycemia detection. HYPE can be used for EHR-based hypoglycemia surveillance and population studies in diabetes patients. [JMIR Med Inform 2019;7(4):e14340]
Learning to detect and understand drug discontinuation events from clinical narratives.
Liu, F.; Pradhan, R.; Druhl, E.; Freund, E.; Liu, W.; Sauer, B. C.; Cunningham, F.; Gordon, A. J.; Peters, C. B.; and Yu, H.
Journal of the American Medical Informatics Association, 26(10): 943–951. October 2019.
Paper
doi
bibtex
abstract
@article{liu_learning_2019, title = {Learning to detect and understand drug discontinuation events from clinical narratives}, volume = {26}, url = {https://academic.oup.com/jamia/article/26/10/943/5481540}, doi = {10.1093/jamia/ocz048}, abstract = {AbstractObjective. Identifying drug discontinuation (DDC) events and understanding their reasons are important for medication management and drug safety survei}, language = {en}, number = {10}, urldate = {2019-12-29}, journal = {Journal of the American Medical Informatics Association}, author = {Liu, Feifan and Pradhan, Richeek and Druhl, Emily and Freund, Elaine and Liu, Weisong and Sauer, Brian C. and Cunningham, Fran and Gordon, Adam J. and Peters, Celena B. and Yu, Hong}, month = oct, year = {2019}, pmid = {31034028 PMCID: PMC6748801}, pages = {943--951}, }
AbstractObjective. Identifying drug discontinuation (DDC) events and understanding their reasons are important for medication management and drug safety survei
Overview of the First Natural Language Processing Challenge for Extracting Medication, Indication, and Adverse Drug Events from Electronic Health Record Notes (MADE 1.0).
Jagannatha, A.; Liu, F.; Liu, W.; and Yu, H.
Drug Safety, (1): 99–111. January 2019.
doi bibtex abstract
doi bibtex abstract
@article{jagannatha_overview_2019, title = {Overview of the {First} {Natural} {Language} {Processing} {Challenge} for {Extracting} {Medication}, {Indication}, and {Adverse} {Drug} {Events} from {Electronic} {Health} {Record} {Notes} ({MADE} 1.0)}, issn = {1179-1942}, doi = {10.1007/s40264-018-0762-z}, abstract = {INTRODUCTION: This work describes the Medication and Adverse Drug Events from Electronic Health Records (MADE 1.0) corpus and provides an overview of the MADE 1.0 2018 challenge for extracting medication, indication, and adverse drug events (ADEs) from electronic health record (EHR) notes. OBJECTIVE: The goal of MADE is to provide a set of common evaluation tasks to assess the state of the art for natural language processing (NLP) systems applied to EHRs supporting drug safety surveillance and pharmacovigilance. We also provide benchmarks on the MADE dataset using the system submissions received in the MADE 2018 challenge. METHODS: The MADE 1.0 challenge has released an expert-annotated cohort of medication and ADE information comprising 1089 fully de-identified longitudinal EHR notes from 21 randomly selected patients with cancer at the University of Massachusetts Memorial Hospital. Using this cohort as a benchmark, the MADE 1.0 challenge designed three shared NLP tasks. The named entity recognition (NER) task identifies medications and their attributes (dosage, route, duration, and frequency), indications, ADEs, and severity. The relation identification (RI) task identifies relations between the named entities: medication-indication, medication-ADE, and attribute relations. The third shared task (NER-RI) evaluates NLP models that perform the NER and RI tasks jointly. In total, 11 teams from four countries participated in at least one of the three shared tasks, and 41 system submissions were received in total. RESULTS: The best systems F1 scores for NER, RI, and NER-RI were 0.82, 0.86, and 0.61, respectively. Ensemble classifiers using the team submissions improved the performance further, with an F1 score of 0.85, 0.87, and 0.66 for the three tasks, respectively. CONCLUSION: MADE results show that recent progress in NLP has led to remarkable improvements in NER and RI tasks for the clinical domain. However, some room for improvement remains, particularly in the NER-RI task.}, language = {eng}, number = {1}, journal = {Drug Safety}, author = {Jagannatha, Abhyuday and Liu, Feifan and Liu, Weisong and Yu, Hong}, month = jan, year = {2019}, pmid = {30649735 PMCID: PMC6860017}, pages = {99--111}, }
INTRODUCTION: This work describes the Medication and Adverse Drug Events from Electronic Health Records (MADE 1.0) corpus and provides an overview of the MADE 1.0 2018 challenge for extracting medication, indication, and adverse drug events (ADEs) from electronic health record (EHR) notes. OBJECTIVE: The goal of MADE is to provide a set of common evaluation tasks to assess the state of the art for natural language processing (NLP) systems applied to EHRs supporting drug safety surveillance and pharmacovigilance. We also provide benchmarks on the MADE dataset using the system submissions received in the MADE 2018 challenge. METHODS: The MADE 1.0 challenge has released an expert-annotated cohort of medication and ADE information comprising 1089 fully de-identified longitudinal EHR notes from 21 randomly selected patients with cancer at the University of Massachusetts Memorial Hospital. Using this cohort as a benchmark, the MADE 1.0 challenge designed three shared NLP tasks. The named entity recognition (NER) task identifies medications and their attributes (dosage, route, duration, and frequency), indications, ADEs, and severity. The relation identification (RI) task identifies relations between the named entities: medication-indication, medication-ADE, and attribute relations. The third shared task (NER-RI) evaluates NLP models that perform the NER and RI tasks jointly. In total, 11 teams from four countries participated in at least one of the three shared tasks, and 41 system submissions were received in total. RESULTS: The best systems F1 scores for NER, RI, and NER-RI were 0.82, 0.86, and 0.61, respectively. Ensemble classifiers using the team submissions improved the performance further, with an F1 score of 0.85, 0.87, and 0.66 for the three tasks, respectively. CONCLUSION: MADE results show that recent progress in NLP has led to remarkable improvements in NER and RI tasks for the clinical domain. However, some room for improvement remains, particularly in the NER-RI task.
Naranjo Question Answering using End-to-End Multi-task Learning Model.
Rawat, B. P; Li, F.; and Yu, H.
25th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD),2547–2555. 2019.
doi bibtex abstract
doi bibtex abstract
@article{rawat_naranjo_2019, title = {Naranjo {Question} {Answering} using {End}-to-{End} {Multi}-task {Learning} {Model}}, doi = {10.1145/3292500.3330770}, abstract = {In the clinical domain, it is important to understand whether an adverse drug reaction (ADR) is caused by a particular medication. Clinical judgement studies help judge the causal relation between a medication and its ADRs. In this study, we present the first attempt to automatically infer the causality between a drug and an ADR from electronic health records (EHRs) by answering the Naranjo questionnaire, the validated clinical question answering set used by domain experts for ADR causality assessment. Using physicians’ annotation as the gold standard, our proposed joint model, which uses multi-task learning to predict the answers of a subset of the Naranjo questionnaire, significantly outperforms the baseline pipeline model with a good margin, achieving a macro-weighted f-score between 0.3652 – 0.5271 and micro-weighted f-score between 0.9523 – 0.9918.}, journal = {25th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)}, author = {Rawat, Bhanu P and Li, Fei and Yu, Hong}, year = {2019}, pmid = {31799022 NIHMSID: NIHMS1058295 PMCID:PMC6887102}, pages = {2547--2555}, }
In the clinical domain, it is important to understand whether an adverse drug reaction (ADR) is caused by a particular medication. Clinical judgement studies help judge the causal relation between a medication and its ADRs. In this study, we present the first attempt to automatically infer the causality between a drug and an ADR from electronic health records (EHRs) by answering the Naranjo questionnaire, the validated clinical question answering set used by domain experts for ADR causality assessment. Using physicians’ annotation as the gold standard, our proposed joint model, which uses multi-task learning to predict the answers of a subset of the Naranjo questionnaire, significantly outperforms the baseline pipeline model with a good margin, achieving a macro-weighted f-score between 0.3652 – 0.5271 and micro-weighted f-score between 0.9523 – 0.9918.
A neural abstractive summarization model guided with topic sentences. ICONIP.
Chen, C.; Hu, B.; Chen, Q.; and Yu, H.
In 2019.
bibtex
bibtex
@inproceedings{chen_neural_2019, title = {A neural abstractive summarization model guided with topic sentences. {ICONIP}}, author = {Chen, Chen and Hu, Baotian and Chen, Qingcai and Yu, Hong}, year = {2019}, }
An investigation of single-domain and multidomain medication and adverse drug event relation extraction from electronic health record notes using advanced deep learning models.
Li, F.; and Yu, H.
Journal of the American Medical Informatics Association, 26(7): 646–654. July 2019.
Paper
doi
bibtex
abstract
@article{li_investigation_2019, title = {An investigation of single-domain and multidomain medication and adverse drug event relation extraction from electronic health record notes using advanced deep learning models}, volume = {26}, url = {https://academic.oup.com/jamia/article/26/7/646/5426087}, doi = {10.1093/jamia/ocz018}, abstract = {AbstractObjective. We aim to evaluate the effectiveness of advanced deep learning models (eg, capsule network [CapNet], adversarial training [ADV]) for single-}, language = {en}, number = {7}, urldate = {2019-12-09}, journal = {Journal of the American Medical Informatics Association}, author = {Li, Fei and Yu, Hong}, month = jul, year = {2019}, pages = {646--654}, }
AbstractObjective. We aim to evaluate the effectiveness of advanced deep learning models (eg, capsule network [CapNet], adversarial training [ADV]) for single-
Anticoagulant prescribing for non-valvular atrial fibrillation in the Veterans Health Administration.
Rose, A.; Goldberg, R; McManus, D.; Kapoor, A; Wang, V; Liu, W; and Yu, H
Journal of the American Heart Association. 2019.
doi bibtex abstract
doi bibtex abstract
@article{rose_anticoagulant_2019, title = {Anticoagulant prescribing for non-valvular atrial fibrillation in the {Veterans} {Health} {Administration}}, doi = {10.1161/JAHA.119.012646}, abstract = {Background Direct acting oral anticoagulants (DOACs) theoretically could contribute to addressing underuse of anticoagulation in non-valvular atrial fibrillation (NVAF). Few studies have examined this prospect, however. The potential of DOACs to address underuse of anticoagulation in NVAF could be magnified within a healthcare system that sharply limits patients' exposure to out-of-pocket copayments, such as the Veterans Health Administration (VA). Methods and Results We used a clinical data set of all patients with NVAF treated within VA from 2007 to 2016 (n=987 373). We examined how the proportion of patients receiving any anticoagulation, and which agent was prescribed, changed over time. When first approved for VA use in 2011, DOACs constituted a tiny proportion of all prescriptions for anticoagulants (2\%); by 2016, this proportion had increased to 45\% of all prescriptions and 67\% of new prescriptions. Patient characteristics associated with receiving a DOAC, rather than warfarin, included white race, better kidney function, fewer comorbid conditions overall, and no history of stroke or bleeding. In 2007, before the introduction of DOACs, 56\% of VA patients with NVAF were receiving anticoagulation; this dipped to 44\% in 2012 just after the introduction of DOACs and had risen back to 51\% by 2016. Conclusions These results do not suggest that the availability of DOACs has led to an increased proportion of patients with NVAF receiving anticoagulation, even in the context of a healthcare system that sharply limits patients' exposure to out-of-pocket copayments.}, journal = {Journal of the American Heart Association}, author = {Rose, AJ and Goldberg, R and McManus, DD and Kapoor, A and Wang, V and Liu, W and Yu, H}, year = {2019}, pmid = {31441364 PMCID:PMC6755851}, }
Background Direct acting oral anticoagulants (DOACs) theoretically could contribute to addressing underuse of anticoagulation in non-valvular atrial fibrillation (NVAF). Few studies have examined this prospect, however. The potential of DOACs to address underuse of anticoagulation in NVAF could be magnified within a healthcare system that sharply limits patients' exposure to out-of-pocket copayments, such as the Veterans Health Administration (VA). Methods and Results We used a clinical data set of all patients with NVAF treated within VA from 2007 to 2016 (n=987 373). We examined how the proportion of patients receiving any anticoagulation, and which agent was prescribed, changed over time. When first approved for VA use in 2011, DOACs constituted a tiny proportion of all prescriptions for anticoagulants (2%); by 2016, this proportion had increased to 45% of all prescriptions and 67% of new prescriptions. Patient characteristics associated with receiving a DOAC, rather than warfarin, included white race, better kidney function, fewer comorbid conditions overall, and no history of stroke or bleeding. In 2007, before the introduction of DOACs, 56% of VA patients with NVAF were receiving anticoagulation; this dipped to 44% in 2012 just after the introduction of DOACs and had risen back to 51% by 2016. Conclusions These results do not suggest that the availability of DOACs has led to an increased proportion of patients with NVAF receiving anticoagulation, even in the context of a healthcare system that sharply limits patients' exposure to out-of-pocket copayments.
Learning Latent Parameters without Human Response Patterns: Item Response Theory with Artificial Crowds.
Lalor, J. P.; Wu, H.; and Yu, H.
In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 4240–4250, Hong Kong, China, November 2019. Association for Computational Linguistics
NIHMSID: NIHMS1059054
Paper
doi
bibtex
abstract
@inproceedings{lalor_learning_2019, address = {Hong Kong, China}, title = {Learning {Latent} {Parameters} without {Human} {Response} {Patterns}: {Item} {Response} {Theory} with {Artificial} {Crowds}}, shorttitle = {Learning {Latent} {Parameters} without {Human} {Response} {Patterns}}, url = {https://www.aclweb.org/anthology/D19-1434}, doi = {10.18653/v1/D19-1434}, abstract = {Incorporating Item Response Theory (IRT) into NLP tasks can provide valuable information about model performance and behavior. Traditionally, IRT models are learned using human response pattern (RP) data, presenting a significant bottleneck for large data sets like those required for training deep neural networks (DNNs). In this work we propose learning IRT models using RPs generated from artificial crowds of DNN models. We demonstrate the effectiveness of learning IRT models using DNN-generated data through quantitative and qualitative analyses for two NLP tasks. Parameters learned from human and machine RPs for natural language inference and sentiment analysis exhibit medium to large positive correlations. We demonstrate a use-case for latent difficulty item parameters, namely training set filtering, and show that using difficulty to sample training data outperforms baseline methods. Finally, we highlight cases where human expectation about item difficulty does not match difficulty as estimated from the machine RPs.}, urldate = {2019-11-11}, booktitle = {Proceedings of the 2019 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing} and the 9th {International} {Joint} {Conference} on {Natural} {Language} {Processing} ({EMNLP}-{IJCNLP})}, publisher = {Association for Computational Linguistics}, author = {Lalor, John P. and Wu, Hao and Yu, Hong}, month = nov, year = {2019}, pmcid = {PMC6892593}, pmid = {31803865}, note = {NIHMSID: NIHMS1059054}, pages = {4240--4250}, }
Incorporating Item Response Theory (IRT) into NLP tasks can provide valuable information about model performance and behavior. Traditionally, IRT models are learned using human response pattern (RP) data, presenting a significant bottleneck for large data sets like those required for training deep neural networks (DNNs). In this work we propose learning IRT models using RPs generated from artificial crowds of DNN models. We demonstrate the effectiveness of learning IRT models using DNN-generated data through quantitative and qualitative analyses for two NLP tasks. Parameters learned from human and machine RPs for natural language inference and sentiment analysis exhibit medium to large positive correlations. We demonstrate a use-case for latent difficulty item parameters, namely training set filtering, and show that using difficulty to sample training data outperforms baseline methods. Finally, we highlight cases where human expectation about item difficulty does not match difficulty as estimated from the machine RPs.
Clinical Question Answering from Electronic Health Records. In the MLHC 2019 research track proceedings.
Singh, B.; Li, F.; and Yu, H.
In The MLHC 2019 research track proceedings, 2019.
Paper
bibtex
@inproceedings{singh_clinical_2019, title = {Clinical {Question} {Answering} from {Electronic} {Health} {Records}. {In} the {MLHC} 2019 research track proceedings}, url = {https://static1.squarespace.com/static/59d5ac1780bd5ef9c396eda6/t/5d472f54d73cd5000124d13c/1564946262055/Rawat.pdf}, booktitle = {The {MLHC} 2019 research track proceedings}, author = {Singh, Bhanu and Li, Fei and Yu, Hong}, year = {2019}, }
Comparing Human and DNN-Ensemble Response Patterns for Item Response Theory Model Fitting.
Lalor, J.; Wu, H.; and Yu, H.
2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)The Workshop on Cognitive Modeling and Computational Linguistics (CMCL). 2019.
Paper
bibtex
@article{lalor_comparing_2019, title = {Comparing {Human} and {DNN}-{Ensemble} {Response} {Patterns} for {Item} {Response} {Theory} {Model} {Fitting}}, url = {http://jplalor.github.io/pdfs/cmcl19_irt.pdf}, journal = {2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL)The Workshop on Cognitive Modeling and Computational Linguistics (CMCL)}, author = {Lalor, John and Wu, Hao and Yu, Hong}, year = {2019}, }
QuikLitE, a Framework for Quick Literacy Evaluation in Medicine: Development and Validation.
Zheng, J.; and Yu, H.
Journal of Medical Internet Research, 21(2): e12525. 2019.
Paper
doi
bibtex
abstract
@article{zheng_quiklite_2019, title = {{QuikLitE}, a {Framework} for {Quick} {Literacy} {Evaluation} in {Medicine}: {Development} and {Validation}}, volume = {21}, copyright = {Unless stated otherwise, all articles are open-access distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work (}, shorttitle = {{QuikLitE}, a {Framework} for {Quick} {Literacy} {Evaluation} in {Medicine}}, url = {https://www.jmir.org/2019/2/e12525/}, doi = {10.2196/jmir.12525}, abstract = {Background: A plethora of health literacy instruments was developed over the decades. They usually start with experts curating passages of text or word lists, followed by psychometric validation and revision based on test results obtained from a sample population. This process is costly and it is difficult to customize for new usage scenarios. Objective: This study aimed to develop and evaluate a framework for dynamically creating test instruments that can provide a focused assessment of patients’ health literacy. Methods: A health literacy framework and scoring method were extended from the vocabulary knowledge test to accommodate a wide range of item difficulties and various degrees of uncertainty in the participant’s answer. Web-based tests from Amazon Mechanical Turk users were used to assess reliability and validity. Results: Parallel forms of our tests showed high reliability (correlation=.78; 95\% CI 0.69-0.85). Validity measured as correlation with an electronic health record comprehension instrument was higher (.47-.61 among 3 groups) than 2 existing tools (Short Assessment of Health Literacy-English, .38-.43; Short Test of Functional Health Literacy in Adults, .34-.46). Our framework is able to distinguish higher literacy levels that are often not measured by other instruments. It is also flexible, allowing customizations to the test the designer’s focus on a particular interest in a subject matter or domain. The framework is among the fastest health literacy instrument to administer. Conclusions: We proposed a valid and highly reliable framework to dynamically create health literacy instruments, alleviating the need to repeat a time-consuming process when a new use scenario arises. This framework can be customized to a specific need on demand and can measure skills beyond the basic level. [J Med Internet Res 2019;21(2):e12525]}, language = {en}, number = {2}, urldate = {2019-02-22}, journal = {Journal of Medical Internet Research}, author = {Zheng, Jiaping and Yu, Hong}, year = {2019}, pmid = {30794206 PMCID: 6406229}, pages = {e12525}, }
Background: A plethora of health literacy instruments was developed over the decades. They usually start with experts curating passages of text or word lists, followed by psychometric validation and revision based on test results obtained from a sample population. This process is costly and it is difficult to customize for new usage scenarios. Objective: This study aimed to develop and evaluate a framework for dynamically creating test instruments that can provide a focused assessment of patients’ health literacy. Methods: A health literacy framework and scoring method were extended from the vocabulary knowledge test to accommodate a wide range of item difficulties and various degrees of uncertainty in the participant’s answer. Web-based tests from Amazon Mechanical Turk users were used to assess reliability and validity. Results: Parallel forms of our tests showed high reliability (correlation=.78; 95% CI 0.69-0.85). Validity measured as correlation with an electronic health record comprehension instrument was higher (.47-.61 among 3 groups) than 2 existing tools (Short Assessment of Health Literacy-English, .38-.43; Short Test of Functional Health Literacy in Adults, .34-.46). Our framework is able to distinguish higher literacy levels that are often not measured by other instruments. It is also flexible, allowing customizations to the test the designer’s focus on a particular interest in a subject matter or domain. The framework is among the fastest health literacy instrument to administer. Conclusions: We proposed a valid and highly reliable framework to dynamically create health literacy instruments, alleviating the need to repeat a time-consuming process when a new use scenario arises. This framework can be customized to a specific need on demand and can measure skills beyond the basic level. [J Med Internet Res 2019;21(2):e12525]
Towards Drug Safety Surveillance and Pharmacovigilance: Current Progress in Detecting Medication and Adverse Drug Events from Electronic Health Records.
Liu, F.; Jagannatha, A.; and Yu, H.
Drug Safety. January 2019.
Paper
doi
bibtex
@article{liu_towards_2019, title = {Towards {Drug} {Safety} {Surveillance} and {Pharmacovigilance}: {Current} {Progress} in {Detecting} {Medication} and {Adverse} {Drug} {Events} from {Electronic} {Health} {Records}}, issn = {1179-1942}, shorttitle = {Towards {Drug} {Safety} {Surveillance} and {Pharmacovigilance}}, url = {https://doi.org/10.1007/s40264-018-0766-8}, doi = {10.1007/s40264-018-0766-8}, language = {en}, urldate = {2019-01-31}, journal = {Drug Safety}, author = {Liu, Feifan and Jagannatha, Abhyuday and Yu, Hong}, month = jan, year = {2019}, pmid = {30649734}, }
Improving Electronic Health Record Note Comprehension With NoteAid: Randomized Trial of Electronic Health Record Note Comprehension Interventions With Crowdsourced Workers.
Lalor, J. P.; Woolf, B.; and Yu, H.
Journal of Medical Internet Research, 21(1): e10793. 2019.
Paper
doi
bibtex
abstract
@article{lalor_improving_2019, title = {Improving {Electronic} {Health} {Record} {Note} {Comprehension} {With} {NoteAid}: {Randomized} {Trial} of {Electronic} {Health} {Record} {Note} {Comprehension} {Interventions} {With} {Crowdsourced} {Workers}}, volume = {21}, copyright = {Unless stated otherwise, all articles are open-access distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work (}, shorttitle = {Improving {Electronic} {Health} {Record} {Note} {Comprehension} {With} {NoteAid}}, url = {https://www.jmir.org/2019/1/e10793/}, doi = {10.2196/jmir.10793}, abstract = {Background: Patient portals are becoming more common, and with them, the ability of patients to access their personal electronic health records (EHRs). EHRs, in particular the free-text EHR notes, often contain medical jargon and terms that are difficult for laypersons to understand. There are many Web-based resources for learning more about particular diseases or conditions, including systems that directly link to lay definitions or educational materials for medical concepts. Objective: Our goal is to determine whether use of one such tool, NoteAid, leads to higher EHR note comprehension ability. We use a new EHR note comprehension assessment tool instead of patient self-reported scores. Methods: In this work, we compare a passive, self-service educational resource (MedlinePlus) with an active resource (NoteAid) where definitions are provided to the user for medical concepts that the system identifies. We use Amazon Mechanical Turk (AMT) to recruit individuals to complete ComprehENotes, a new test of EHR note comprehension. Results: Mean scores for individuals with access to NoteAid are significantly higher than the mean baseline scores, both for raw scores (P=.008) and estimated ability (P=.02). Conclusions: In our experiments, we show that the active intervention leads to significantly higher scores on the comprehension test as compared with a baseline group with no resources provided. In contrast, there is no significant difference between the group that was provided with the passive intervention and the baseline group. Finally, we analyze the demographics of the individuals who participated in our AMT task and show differences between groups that align with the current understanding of health literacy between populations. This is the first work to show improvements in comprehension using tools such as NoteAid as measured by an EHR note comprehension assessment tool as opposed to patient self-reported scores. [J Med Internet Res 2019;21(1):e10793]}, language = {en}, number = {1}, urldate = {2019-01-31}, journal = {Journal of Medical Internet Research}, author = {Lalor, John P. and Woolf, Beverly and Yu, Hong}, year = {2019}, pmid = {30664453 PMCID: 6351990}, pages = {e10793}, }
Background: Patient portals are becoming more common, and with them, the ability of patients to access their personal electronic health records (EHRs). EHRs, in particular the free-text EHR notes, often contain medical jargon and terms that are difficult for laypersons to understand. There are many Web-based resources for learning more about particular diseases or conditions, including systems that directly link to lay definitions or educational materials for medical concepts. Objective: Our goal is to determine whether use of one such tool, NoteAid, leads to higher EHR note comprehension ability. We use a new EHR note comprehension assessment tool instead of patient self-reported scores. Methods: In this work, we compare a passive, self-service educational resource (MedlinePlus) with an active resource (NoteAid) where definitions are provided to the user for medical concepts that the system identifies. We use Amazon Mechanical Turk (AMT) to recruit individuals to complete ComprehENotes, a new test of EHR note comprehension. Results: Mean scores for individuals with access to NoteAid are significantly higher than the mean baseline scores, both for raw scores (P=.008) and estimated ability (P=.02). Conclusions: In our experiments, we show that the active intervention leads to significantly higher scores on the comprehension test as compared with a baseline group with no resources provided. In contrast, there is no significant difference between the group that was provided with the passive intervention and the baseline group. Finally, we analyze the demographics of the individuals who participated in our AMT task and show differences between groups that align with the current understanding of health literacy between populations. This is the first work to show improvements in comprehension using tools such as NoteAid as measured by an EHR note comprehension assessment tool as opposed to patient self-reported scores. [J Med Internet Res 2019;21(1):e10793]
Generating Classical Chinese Poems from Vernacular Chinese.
Yang, Z.; Cai, P.; Feng, Y.; Li, F.; Feng, W.; Chiu, E. S.; and yu , h.
In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 6156–6165, Hong Kong, China, November 2019. Association for Computational Linguistics
Paper
doi
bibtex
abstract
@inproceedings{yang_generating_2019, address = {Hong Kong, China}, title = {Generating {Classical} {Chinese} {Poems} from {Vernacular} {Chinese}}, url = {https://www.aclweb.org/anthology/D19-1637}, doi = {10.18653/v1/D19-1637}, abstract = {Classical Chinese poetry is a jewel in the treasure house of Chinese culture. Previous poem generation models only allow users to employ keywords to interfere the meaning of generated poems, leaving the dominion of generation to the model. In this paper, we propose a novel task of generating classical Chinese poems from vernacular, which allows users to have more control over the semantic of generated poems. We adapt the approach of unsupervised machine translation (UMT) to our task. We use segmentation-based padding and reinforcement learning to address under-translation and over-translation respectively. According to experiments, our approach significantly improve the perplexity and BLEU compared with typical UMT models. Furthermore, we explored guidelines on how to write the input vernacular to generate better poems. Human evaluation showed our approach can generate high-quality poems which are comparable to amateur poems.}, urldate = {2019-11-11}, booktitle = {Proceedings of the 2019 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing} and the 9th {International} {Joint} {Conference} on {Natural} {Language} {Processing} ({EMNLP}-{IJCNLP})}, publisher = {Association for Computational Linguistics}, author = {Yang, Zhichao and Cai, Pengshan and Feng, Yansong and Li, Fei and Feng, Weijiang and Chiu, Elena Suet-Ying and yu, hong}, month = nov, year = {2019}, pages = {6156--6165}, }
Classical Chinese poetry is a jewel in the treasure house of Chinese culture. Previous poem generation models only allow users to employ keywords to interfere the meaning of generated poems, leaving the dominion of generation to the model. In this paper, we propose a novel task of generating classical Chinese poems from vernacular, which allows users to have more control over the semantic of generated poems. We adapt the approach of unsupervised machine translation (UMT) to our task. We use segmentation-based padding and reinforcement learning to address under-translation and over-translation respectively. According to experiments, our approach significantly improve the perplexity and BLEU compared with typical UMT models. Furthermore, we explored guidelines on how to write the input vernacular to generate better poems. Human evaluation showed our approach can generate high-quality poems which are comparable to amateur poems.
Method for Meta-Level Continual Learning.
Yu, H.; and Munkhdalai, T.
January 2019.
Paper
bibtex
abstract
@patent{yu_method_2019, title = {Method for {Meta}-{Level} {Continual} {Learning}}, url = {https://patents.google.com/patent/US20190034798A1/en}, abstract = {Classification of an input task data set by meta level continual learning includes analyzing first and second training data sets in a task space to generate first and second meta weights and a slow weight value, and comparing an input task data set to the slow weight to generate a fast weight. The first and second meta weights are parameterized with the fast weight value to update the slow weight value, whereby a value is associated with the input task data set, thereby classifying the input task data set by meta level continual learning.}, nationality = {US}, assignee = {University Of Massachusetts Medical School}, number = {US20190034798A1}, urldate = {2019-04-10}, author = {Yu, Hong and Munkhdalai, Tsendsuren}, month = jan, year = {2019}, keywords = {loss, meta, slow, task, weight}, }
Classification of an input task data set by meta level continual learning includes analyzing first and second training data sets in a task space to generate first and second meta weights and a slow weight value, and comparing an input task data set to the slow weight to generate a fast weight. The first and second meta weights are parameterized with the fast weight value to update the slow weight value, whereby a value is associated with the input task data set, thereby classifying the input task data set by meta level continual learning.
Advancing Clinical Research Through Natural Language Processing on Electronic Health Records: Traditional Machine Learning Meets Deep Learning.
Liu, F.; Weng, C.; and Yu, H.
In Richesson, R. L.; and Andrews, J. E., editor(s), Clinical Research Informatics, of Health Informatics, pages 357–378. Springer International Publishing, Cham, 2019.
Paper
doi
bibtex
abstract
@incollection{liu_advancing_2019, address = {Cham}, series = {Health {Informatics}}, title = {Advancing {Clinical} {Research} {Through} {Natural} {Language} {Processing} on {Electronic} {Health} {Records}: {Traditional} {Machine} {Learning} {Meets} {Deep} {Learning}}, isbn = {978-3-319-98779-8}, shorttitle = {Advancing {Clinical} {Research} {Through} {Natural} {Language} {Processing} on {Electronic} {Health} {Records}}, url = {https://doi.org/10.1007/978-3-319-98779-8_17}, abstract = {Electronic health records (EHR) capture “real-world” disease and care processes and hence offer richer and more generalizable data for comparative effectiveness research than traditional randomized clinical trial studies. With the increasingly broadening adoption of EHR worldwide, there is a growing need to widen the use of EHR data to support clinical research. A big barrier to this goal is that much of the information in EHR is still narrative. This chapter describes the foundation of biomedical language processing and explains how traditional machine learning and the state-of-the-art deep learning techniques can be employed in the context of extracting and transforming narrative information in EHR to support clinical research.}, language = {en}, urldate = {2019-04-09}, booktitle = {Clinical {Research} {Informatics}}, publisher = {Springer International Publishing}, author = {Liu, Feifan and Weng, Chunhua and Yu, Hong}, editor = {Richesson, Rachel L. and Andrews, James E.}, year = {2019}, doi = {10.1007/978-3-319-98779-8_17}, keywords = {Biomedical natural language processing, Clinical research, Deep learning, Electronic health records, Machine learning, Rule-based approach}, pages = {357--378}, }
Electronic health records (EHR) capture “real-world” disease and care processes and hence offer richer and more generalizable data for comparative effectiveness research than traditional randomized clinical trial studies. With the increasingly broadening adoption of EHR worldwide, there is a growing need to widen the use of EHR data to support clinical research. A big barrier to this goal is that much of the information in EHR is still narrative. This chapter describes the foundation of biomedical language processing and explains how traditional machine learning and the state-of-the-art deep learning techniques can be employed in the context of extracting and transforming narrative information in EHR to support clinical research.
Automatic extraction of quantitative data from ClinicalTrials.gov to conduct meta-analyses.
Pradhan, R.; Hoaglin, D. C.; Cornell, M.; Liu, W.; Wang, V.; and Yu, H.
Journal of Clinical Epidemiology, 105: 92–100. January 2019.
doi bibtex abstract
doi bibtex abstract
@article{pradhan_automatic_2019, title = {Automatic extraction of quantitative data from {ClinicalTrials}.gov to conduct meta-analyses}, volume = {105}, issn = {1878-5921}, doi = {10.1016/j.jclinepi.2018.08.023}, abstract = {OBJECTIVES: Systematic reviews and meta-analyses are labor-intensive and time-consuming. Automated extraction of quantitative data from primary studies can accelerate this process. ClinicalTrials.gov, launched in 2000, is the world's largest trial repository of results data from clinical trials; it has been used as a source instead of journal articles. We have developed a Web application called EXACT (EXtracting Accurate efficacy and safety information from ClinicalTrials.gov) that allows users without advanced programming skills to automatically extract data from ClinicalTrials.gov in analysis-ready format. We have also used the automatically extracted data to examine the reproducibility of meta-analyses in three published systematic reviews. STUDY DESIGN AND SETTING: We developed a Python-based software application (EXACT) that automatically extracts data required for meta-analysis from the ClinicalTrials.gov database in a spreadsheet format. We confirmed the accuracy of the extracted data and then used those data to repeat meta-analyses in three published systematic reviews. To ensure that we used the same statistical methods and outcomes as the published systematic reviews, we repeated the meta-analyses using data manually extracted from the relevant journal articles. For the outcomes whose results we were able to reproduce using those journal article data, we examined the usability of ClinicalTrials.gov data. RESULTS: EXACT extracted data at ClincalTrials.gov with 100\% accuracy, and it required 60\% less time than the usual practice of manually extracting data from journal articles. We found that 87\% of the data elements extracted using EXACT matched those extracted manually from the journal articles. We were able to reproduce 24 of 28 outcomes using the journal article data. Of these 24 outcomes, we were able to reproduce 83.3\% of the published estimates using data at ClinicalTrials.gov. CONCLUSION: EXACT (http://bio-nlp.org/EXACT) automatically and accurately extracted data elements from ClinicalTrials.gov and thus reduced time in data extraction. The ClinicalTrials.gov data reproduced most meta-analysis results in our study, but this conclusion needs further validation.}, language = {eng}, journal = {Journal of Clinical Epidemiology}, author = {Pradhan, Richeek and Hoaglin, David C. and Cornell, Matthew and Liu, Weisong and Wang, Victoria and Yu, Hong}, month = jan, year = {2019}, pmid = {30257185}, keywords = {Automatic data extraction, ClinicalTrials.gov, Meta-analysis, Reproducibility, Simeprevir, Systematic review, Trametinib, Vortioxetine}, pages = {92--100}, }
OBJECTIVES: Systematic reviews and meta-analyses are labor-intensive and time-consuming. Automated extraction of quantitative data from primary studies can accelerate this process. ClinicalTrials.gov, launched in 2000, is the world's largest trial repository of results data from clinical trials; it has been used as a source instead of journal articles. We have developed a Web application called EXACT (EXtracting Accurate efficacy and safety information from ClinicalTrials.gov) that allows users without advanced programming skills to automatically extract data from ClinicalTrials.gov in analysis-ready format. We have also used the automatically extracted data to examine the reproducibility of meta-analyses in three published systematic reviews. STUDY DESIGN AND SETTING: We developed a Python-based software application (EXACT) that automatically extracts data required for meta-analysis from the ClinicalTrials.gov database in a spreadsheet format. We confirmed the accuracy of the extracted data and then used those data to repeat meta-analyses in three published systematic reviews. To ensure that we used the same statistical methods and outcomes as the published systematic reviews, we repeated the meta-analyses using data manually extracted from the relevant journal articles. For the outcomes whose results we were able to reproduce using those journal article data, we examined the usability of ClinicalTrials.gov data. RESULTS: EXACT extracted data at ClincalTrials.gov with 100% accuracy, and it required 60% less time than the usual practice of manually extracting data from journal articles. We found that 87% of the data elements extracted using EXACT matched those extracted manually from the journal articles. We were able to reproduce 24 of 28 outcomes using the journal article data. Of these 24 outcomes, we were able to reproduce 83.3% of the published estimates using data at ClinicalTrials.gov. CONCLUSION: EXACT (http://bio-nlp.org/EXACT) automatically and accurately extracted data elements from ClinicalTrials.gov and thus reduced time in data extraction. The ClinicalTrials.gov data reproduced most meta-analysis results in our study, but this conclusion needs further validation.
2018
(19)
Clinical Relation Extraction Toward Drug Safety Surveillance Using Electronic Health Record Narratives: Classical Learning Versus Deep Learning.
Munkhdalai, T.; Liu, F.; and Yu, H.
JMIR public health and surveillance, 4(2): e29. April 2018.
doi bibtex abstract
doi bibtex abstract
@article{munkhdalai_clinical_2018, title = {Clinical {Relation} {Extraction} {Toward} {Drug} {Safety} {Surveillance} {Using} {Electronic} {Health} {Record} {Narratives}: {Classical} {Learning} {Versus} {Deep} {Learning}}, volume = {4}, issn = {2369-2960}, shorttitle = {Clinical {Relation} {Extraction} {Toward} {Drug} {Safety} {Surveillance} {Using} {Electronic} {Health} {Record} {Narratives}}, doi = {10.2196/publichealth.9361}, abstract = {BACKGROUND: Medication and adverse drug event (ADE) information extracted from electronic health record (EHR) notes can be a rich resource for drug safety surveillance. Existing observational studies have mainly relied on structured EHR data to obtain ADE information; however, ADEs are often buried in the EHR narratives and not recorded in structured data. OBJECTIVE: To unlock ADE-related information from EHR narratives, there is a need to extract relevant entities and identify relations among them. In this study, we focus on relation identification. This study aimed to evaluate natural language processing and machine learning approaches using the expert-annotated medical entities and relations in the context of drug safety surveillance, and investigate how different learning approaches perform under different configurations. METHODS: We have manually annotated 791 EHR notes with 9 named entities (eg, medication, indication, severity, and ADEs) and 7 different types of relations (eg, medication-dosage, medication-ADE, and severity-ADE). Then, we explored 3 supervised machine learning systems for relation identification: (1) a support vector machines (SVM) system, (2) an end-to-end deep neural network system, and (3) a supervised descriptive rule induction baseline system. For the neural network system, we exploited the state-of-the-art recurrent neural network (RNN) and attention models. We report the performance by macro-averaged precision, recall, and F1-score across the relation types. RESULTS: Our results show that the SVM model achieved the best average F1-score of 89.1\% on test data, outperforming the long short-term memory (LSTM) model with attention (F1-score of 65.72\%) as well as the rule induction baseline system (F1-score of 7.47\%) by a large margin. The bidirectional LSTM model with attention achieved the best performance among different RNN models. With the inclusion of additional features in the LSTM model, its performance can be boosted to an average F1-score of 77.35\%. CONCLUSIONS: It shows that classical learning models (SVM) remains advantageous over deep learning models (RNN variants) for clinical relation identification, especially for long-distance intersentential relations. However, RNNs demonstrate a great potential of significant improvement if more training data become available. Our work is an important step toward mining EHRs to improve the efficacy of drug safety surveillance. Most importantly, the annotated data used in this study will be made publicly available, which will further promote drug safety research in the community.}, language = {eng}, number = {2}, journal = {JMIR public health and surveillance}, author = {Munkhdalai, Tsendsuren and Liu, Feifan and Yu, Hong}, month = apr, year = {2018}, pmid = {29695376 PMCID: PMC5943628}, keywords = {drug-related side effects and adverse reactions, electronic health records, medical informatics applications, natural language processing, neural networks}, pages = {e29}, }
BACKGROUND: Medication and adverse drug event (ADE) information extracted from electronic health record (EHR) notes can be a rich resource for drug safety surveillance. Existing observational studies have mainly relied on structured EHR data to obtain ADE information; however, ADEs are often buried in the EHR narratives and not recorded in structured data. OBJECTIVE: To unlock ADE-related information from EHR narratives, there is a need to extract relevant entities and identify relations among them. In this study, we focus on relation identification. This study aimed to evaluate natural language processing and machine learning approaches using the expert-annotated medical entities and relations in the context of drug safety surveillance, and investigate how different learning approaches perform under different configurations. METHODS: We have manually annotated 791 EHR notes with 9 named entities (eg, medication, indication, severity, and ADEs) and 7 different types of relations (eg, medication-dosage, medication-ADE, and severity-ADE). Then, we explored 3 supervised machine learning systems for relation identification: (1) a support vector machines (SVM) system, (2) an end-to-end deep neural network system, and (3) a supervised descriptive rule induction baseline system. For the neural network system, we exploited the state-of-the-art recurrent neural network (RNN) and attention models. We report the performance by macro-averaged precision, recall, and F1-score across the relation types. RESULTS: Our results show that the SVM model achieved the best average F1-score of 89.1% on test data, outperforming the long short-term memory (LSTM) model with attention (F1-score of 65.72%) as well as the rule induction baseline system (F1-score of 7.47%) by a large margin. The bidirectional LSTM model with attention achieved the best performance among different RNN models. With the inclusion of additional features in the LSTM model, its performance can be boosted to an average F1-score of 77.35%. CONCLUSIONS: It shows that classical learning models (SVM) remains advantageous over deep learning models (RNN variants) for clinical relation identification, especially for long-distance intersentential relations. However, RNNs demonstrate a great potential of significant improvement if more training data become available. Our work is an important step toward mining EHRs to improve the efficacy of drug safety surveillance. Most importantly, the annotated data used in this study will be made publicly available, which will further promote drug safety research in the community.
A Natural Language Processing System That Links Medical Terms in Electronic Health Record Notes to Lay Definitions: System Development Using Physician Reviews.
Chen, J.; Druhl, E.; Polepalli Ramesh, B.; Houston, T. K.; Brandt, C. A.; Zulman, D. M.; Vimalananda, V. G.; Malkani, S.; and Yu, H.
Journal of Medical Internet Research, 20(1): e26. January 2018.
doi bibtex abstract
doi bibtex abstract
@article{chen_natural_2018, title = {A {Natural} {Language} {Processing} {System} {That} {Links} {Medical} {Terms} in {Electronic} {Health} {Record} {Notes} to {Lay} {Definitions}: {System} {Development} {Using} {Physician} {Reviews}}, volume = {20}, issn = {1438-8871}, shorttitle = {A {Natural} {Language} {Processing} {System} {That} {Links} {Medical} {Terms} in {Electronic} {Health} {Record} {Notes} to {Lay} {Definitions}}, doi = {10.2196/jmir.8669}, abstract = {BACKGROUND: Many health care systems now allow patients to access their electronic health record (EHR) notes online through patient portals. Medical jargon in EHR notes can confuse patients, which may interfere with potential benefits of patient access to EHR notes. OBJECTIVE: The aim of this study was to develop and evaluate the usability and content quality of NoteAid, a Web-based natural language processing system that links medical terms in EHR notes to lay definitions, that is, definitions easily understood by lay people. METHODS: NoteAid incorporates two core components: CoDeMed, a lexical resource of lay definitions for medical terms, and MedLink, a computational unit that links medical terms to lay definitions. We developed innovative computational methods, including an adapted distant supervision algorithm to prioritize medical terms important for EHR comprehension to facilitate the effort of building CoDeMed. Ten physician domain experts evaluated the user interface and content quality of NoteAid. The evaluation protocol included a cognitive walkthrough session and a postsession questionnaire. Physician feedback sessions were audio-recorded. We used standard content analysis methods to analyze qualitative data from these sessions. RESULTS: Physician feedback was mixed. Positive feedback on NoteAid included (1) Easy to use, (2) Good visual display, (3) Satisfactory system speed, and (4) Adequate lay definitions. Opportunities for improvement arising from evaluation sessions and feedback included (1) improving the display of definitions for partially matched terms, (2) including more medical terms in CoDeMed, (3) improving the handling of terms whose definitions vary depending on different contexts, and (4) standardizing the scope of definitions for medicines. On the basis of these results, we have improved NoteAid's user interface and a number of definitions, and added 4502 more definitions in CoDeMed. CONCLUSIONS: Physician evaluation yielded useful feedback for content validation and refinement of this innovative tool that has the potential to improve patient EHR comprehension and experience using patient portals. Future ongoing work will develop algorithms to handle ambiguous medical terms and test and evaluate NoteAid with patients.}, language = {eng}, number = {1}, journal = {Journal of Medical Internet Research}, author = {Chen, Jinying and Druhl, Emily and Polepalli Ramesh, Balaji and Houston, Thomas K. and Brandt, Cynthia A. and Zulman, Donna M. and Vimalananda, Varsha G. and Malkani, Samir and Yu, Hong}, month = jan, year = {2018}, pmid = {29358159 PMCID: PMC5799720}, keywords = {computer software, consumer health informatics, electronic health records, natural language processing, usability testing}, pages = {e26}, }
BACKGROUND: Many health care systems now allow patients to access their electronic health record (EHR) notes online through patient portals. Medical jargon in EHR notes can confuse patients, which may interfere with potential benefits of patient access to EHR notes. OBJECTIVE: The aim of this study was to develop and evaluate the usability and content quality of NoteAid, a Web-based natural language processing system that links medical terms in EHR notes to lay definitions, that is, definitions easily understood by lay people. METHODS: NoteAid incorporates two core components: CoDeMed, a lexical resource of lay definitions for medical terms, and MedLink, a computational unit that links medical terms to lay definitions. We developed innovative computational methods, including an adapted distant supervision algorithm to prioritize medical terms important for EHR comprehension to facilitate the effort of building CoDeMed. Ten physician domain experts evaluated the user interface and content quality of NoteAid. The evaluation protocol included a cognitive walkthrough session and a postsession questionnaire. Physician feedback sessions were audio-recorded. We used standard content analysis methods to analyze qualitative data from these sessions. RESULTS: Physician feedback was mixed. Positive feedback on NoteAid included (1) Easy to use, (2) Good visual display, (3) Satisfactory system speed, and (4) Adequate lay definitions. Opportunities for improvement arising from evaluation sessions and feedback included (1) improving the display of definitions for partially matched terms, (2) including more medical terms in CoDeMed, (3) improving the handling of terms whose definitions vary depending on different contexts, and (4) standardizing the scope of definitions for medicines. On the basis of these results, we have improved NoteAid's user interface and a number of definitions, and added 4502 more definitions in CoDeMed. CONCLUSIONS: Physician evaluation yielded useful feedback for content validation and refinement of this innovative tool that has the potential to improve patient EHR comprehension and experience using patient portals. Future ongoing work will develop algorithms to handle ambiguous medical terms and test and evaluate NoteAid with patients.
A hybrid Neural Network Model for Joint Prediction of Presence and Period Assertions of Medical Events in Clinical Notes.
Rumeng, L.; Abhyuday N, J.; and Hong, Y.
AMIA Annual Symposium Proceedings, 2017: 1149–1158. April 2018.
Paper
bibtex
abstract
@article{rumeng_hybrid_2018, title = {A hybrid {Neural} {Network} {Model} for {Joint} {Prediction} of {Presence} and {Period} {Assertions} of {Medical} {Events} in {Clinical} {Notes}}, volume = {2017}, issn = {1942-597X}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5977733/}, abstract = {In this paper, we propose a novel neural network architecture for clinical text mining. We formulate this hybrid neural network model (HNN), composed of recurrent neural network and deep residual network, to jointly predict the presence and period assertion values associated with medical events in clinical texts. We evaluate the effectiveness of our model on a corpus of expert-annotated longitudinal Electronic Health Records (EHR) notes from Cancer patients. Our experiments show that HNN improves the joint assertion classification accuracy as compared to conventional baselines.}, urldate = {2018-10-01}, journal = {AMIA Annual Symposium Proceedings}, author = {Rumeng, Li and Abhyuday N, Jagannatha and Hong, Yu}, month = apr, year = {2018}, pmid = {29854183}, pmcid = {PMC5977733}, pages = {1149--1158}, }
In this paper, we propose a novel neural network architecture for clinical text mining. We formulate this hybrid neural network model (HNN), composed of recurrent neural network and deep residual network, to jointly predict the presence and period assertion values associated with medical events in clinical texts. We evaluate the effectiveness of our model on a corpus of expert-annotated longitudinal Electronic Health Records (EHR) notes from Cancer patients. Our experiments show that HNN improves the joint assertion classification accuracy as compared to conventional baselines.
Assessing the Readability of Medical Documents: A Ranking Approach.
Zheng, J.; and Yu, H.
JMIR medical informatics, 6(1): e17. March 2018.
doi bibtex abstract
doi bibtex abstract
@article{zheng_assessing_2018, title = {Assessing the {Readability} of {Medical} {Documents}: {A} {Ranking} {Approach}}, volume = {6}, issn = {2291-9694}, shorttitle = {Assessing the {Readability} of {Medical} {Documents}}, doi = {10.2196/medinform.8611}, abstract = {BACKGROUND: The use of electronic health record (EHR) systems with patient engagement capabilities, including viewing, downloading, and transmitting health information, has recently grown tremendously. However, using these resources to engage patients in managing their own health remains challenging due to the complex and technical nature of the EHR narratives. OBJECTIVE: Our objective was to develop a machine learning-based system to assess readability levels of complex documents such as EHR notes. METHODS: We collected difficulty ratings of EHR notes and Wikipedia articles using crowdsourcing from 90 readers. We built a supervised model to assess readability based on relative orders of text difficulty using both surface text features and word embeddings. We evaluated system performance using the Kendall coefficient of concordance against human ratings. RESULTS: Our system achieved significantly higher concordance (.734) with human annotators than did a baseline using the Flesch-Kincaid Grade Level, a widely adopted readability formula (.531). The improvement was also consistent across different disease topics. This method's concordance with an individual human user's ratings was also higher than the concordance between different human annotators (.658). CONCLUSIONS: We explored methods to automatically assess the readability levels of clinical narratives. Our ranking-based system using simple textual features and easy-to-learn word embeddings outperformed a widely used readability formula. Our ranking-based method can predict relative difficulties of medical documents. It is not constrained to a predefined set of readability levels, a common design in many machine learning-based systems. Furthermore, the feature set does not rely on complex processing of the documents. One potential application of our readability ranking is personalization, allowing patients to better accommodate their own background knowledge.}, language = {eng}, number = {1}, journal = {JMIR medical informatics}, author = {Zheng, Jiaping and Yu, Hong}, month = mar, year = {2018}, pmid = {29572199}, pmcid = {PMC5889493}, keywords = {comprehension, electronic health records, machine learning, readability}, pages = {e17}, }
BACKGROUND: The use of electronic health record (EHR) systems with patient engagement capabilities, including viewing, downloading, and transmitting health information, has recently grown tremendously. However, using these resources to engage patients in managing their own health remains challenging due to the complex and technical nature of the EHR narratives. OBJECTIVE: Our objective was to develop a machine learning-based system to assess readability levels of complex documents such as EHR notes. METHODS: We collected difficulty ratings of EHR notes and Wikipedia articles using crowdsourcing from 90 readers. We built a supervised model to assess readability based on relative orders of text difficulty using both surface text features and word embeddings. We evaluated system performance using the Kendall coefficient of concordance against human ratings. RESULTS: Our system achieved significantly higher concordance (.734) with human annotators than did a baseline using the Flesch-Kincaid Grade Level, a widely adopted readability formula (.531). The improvement was also consistent across different disease topics. This method's concordance with an individual human user's ratings was also higher than the concordance between different human annotators (.658). CONCLUSIONS: We explored methods to automatically assess the readability levels of clinical narratives. Our ranking-based system using simple textual features and easy-to-learn word embeddings outperformed a widely used readability formula. Our ranking-based method can predict relative difficulties of medical documents. It is not constrained to a predefined set of readability levels, a common design in many machine learning-based systems. Furthermore, the feature set does not rely on complex processing of the documents. One potential application of our readability ranking is personalization, allowing patients to better accommodate their own background knowledge.
Understanding Deep Learning Performance through an Examination of Test Set Difficulty: A Psychometric Case Study.
Lalor, J.; Wu, H.; Munkhdalai, T.; and Yu, H.
In EMNLP, 2018.
Paper
doi
bibtex
abstract
@inproceedings{lalor_understanding_2018, title = {Understanding {Deep} {Learning} {Performance} through an {Examination} of {Test} {Set} {Difficulty}: {A} {Psychometric} {Case} {Study}}, url = {https://arxiv.org/abs/1702.04811v3}, doi = {DOI: 10.18653/v1/D18-1500}, abstract = {Interpreting the performance of deep learning models beyond test set accuracy is challenging. Characteristics of individual data points are often not considered during evaluation, and each data point is treated equally. We examine the impact of a test set question's difficulty to determine if there is a relationship between difficulty and performance. We model difficulty using well-studied psychometric methods on human response patterns. Experiments on Natural Language Inference (NLI) and Sentiment Analysis (SA) show that the likelihood of answering a question correctly is impacted by the question's difficulty. As DNNs are trained with more data, easy examples are learned more quickly than hard examples.}, booktitle = {{EMNLP}}, author = {Lalor, John and Wu, Hao and Munkhdalai, Tsendsuren and Yu, Hong}, year = {2018}, }
Interpreting the performance of deep learning models beyond test set accuracy is challenging. Characteristics of individual data points are often not considered during evaluation, and each data point is treated equally. We examine the impact of a test set question's difficulty to determine if there is a relationship between difficulty and performance. We model difficulty using well-studied psychometric methods on human response patterns. Experiments on Natural Language Inference (NLI) and Sentiment Analysis (SA) show that the likelihood of answering a question correctly is impacted by the question's difficulty. As DNNs are trained with more data, easy examples are learned more quickly than hard examples.
Soft Label Memorization-Generalization for Natural Language Inference.
Lalor, J.; Wu, H.; and Yu, H.
In 2018.
Paper
bibtex
abstract
@inproceedings{lalor_soft_2018, title = {Soft {Label} {Memorization}-{Generalization} for {Natural} {Language} {Inference}.}, url = {https://arxiv.org/abs/1702.08563v3}, abstract = {Often when multiple labels are obtained for a training example it is assumed that there is an element of noise that must be accounted for. It has been shown that this disagreement can be considered signal instead of noise. In this work we investigate using soft labels for training data to improve generalization in machine learning models. However, using soft labels for training Deep Neural Networks (DNNs) is not practical due to the costs involved in obtaining multiple labels for large data sets. We propose soft label memorization-generalization (SLMG), a fine-tuning approach to using soft labels for training DNNs. We assume that differences in labels provided by human annotators represent ambiguity about the true label instead of noise. Experiments with SLMG demonstrate improved generalization performance on the Natural Language Inference (NLI) task. Our experiments show that by injecting a small percentage of soft label training data (0.03\% of training set size) we can improve generalization performance over several baselines.}, author = {Lalor, John and Wu, Hao and Yu, Hong}, year = {2018}, }
Often when multiple labels are obtained for a training example it is assumed that there is an element of noise that must be accounted for. It has been shown that this disagreement can be considered signal instead of noise. In this work we investigate using soft labels for training data to improve generalization in machine learning models. However, using soft labels for training Deep Neural Networks (DNNs) is not practical due to the costs involved in obtaining multiple labels for large data sets. We propose soft label memorization-generalization (SLMG), a fine-tuning approach to using soft labels for training DNNs. We assume that differences in labels provided by human annotators represent ambiguity about the true label instead of noise. Experiments with SLMG demonstrate improved generalization performance on the Natural Language Inference (NLI) task. Our experiments show that by injecting a small percentage of soft label training data (0.03% of training set size) we can improve generalization performance over several baselines.
Sentence Simplification with Memory-Augmented Neural Networks.
Vu, T.; Hu, B.; Munkhdalai, T.; and Yu, H.
In North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2018.
doi bibtex abstract
doi bibtex abstract
@inproceedings{vu_sentence_2018, title = {Sentence {Simplification} with {Memory}-{Augmented} {Neural} {Networks}}, doi = {DOI:10.18653/v1/N18-2013}, abstract = {Sentence simplification aims to simplify the content and structure of complex sentences, and thus make them easier to interpret for human readers, and easier to process for downstream NLP applications. Recent advances in neural machine translation have paved the way for novel approaches to the task. In this paper, we adapt an architecture with augmented memory capacities called Neural Semantic Encoders (Munkhdalai and Yu, 2017) for sentence simplification. Our experiments demonstrate the effectiveness of our approach on different simplification datasets, both in terms of automatic evaluation measures and human judgments.}, booktitle = {North {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}}, author = {Vu, Tu and Hu, Baotian and Munkhdalai, Tsendsuren and Yu, Hong}, year = {2018}, }
Sentence simplification aims to simplify the content and structure of complex sentences, and thus make them easier to interpret for human readers, and easier to process for downstream NLP applications. Recent advances in neural machine translation have paved the way for novel approaches to the task. In this paper, we adapt an architecture with augmented memory capacities called Neural Semantic Encoders (Munkhdalai and Yu, 2017) for sentence simplification. Our experiments demonstrate the effectiveness of our approach on different simplification datasets, both in terms of automatic evaluation measures and human judgments.
Recent Trends In Oral Anticoagulant Use and Post-Discharge Complications Among Atrial Fibrillation Patients With Acute Myocardial Infarction.
Amartya Kundu; Kevin O ’Day; Darleen M. Lessard; Joel M. Gore1; Steven A. Lubitz; Hong Yu; Mohammed W. Akhter; Daniel Z. Fisher; Robert M. Hayward Jr.; Nils Henninger; Jane S. Saczynski; Allan J. Walkey; Alok Kapoor; Jorge Yarzebski; Robert J. Goldberg; and David D. McManus
In 2018. Journal of Atrial Fibrillation
doi bibtex abstract
doi bibtex abstract
@inproceedings{amartya_kundu_recent_2018, title = {Recent {Trends} {In} {Oral} {Anticoagulant} {Use} and {Post}-{Discharge} {Complications} {Among} {Atrial} {Fibrillation} {Patients} {With} {Acute} {Myocardial} {Infarction}}, doi = {DOI: 10.4022/jafib.1749}, abstract = {BACKGROUND: Atrial fibrillation (AF) is a common complication of acute myocardial infarction (AMI).The CHA2DS2VAScand CHADS2risk scoresare used to identifypatients with AF at risk for strokeand to guide oral anticoagulants (OAC) use, including patients with AMI. However, the epidemiology of AF, further stratifiedaccording to patients' risk of stroke, has not been wellcharacterized among those hospitalized for AMI. METHODS: We examined trends in the frequency of AF, rates of discharge OAC use, and post-discharge outcomes among 6,627 residents of the Worcester, Massachusetts area who survived hospitalization for AMI at 11 medical centers between 1997 and 2011. RESULTS: A total of 1,050AMI patients had AF (16\%) andthe majority (91\%)had a CHA2DS2VAScscore {\textgreater}2.AF rates were highest among patients in the highest stroke risk group.In comparison to patients without AF, patients with AMI and AF in the highest stroke risk category had higher rates of post-discharge complications, including higher 30-day re-hospitalization [27 \% vs. 17 \%], 30-day post-discharge death [10 \% vs. 5\%], and 1-year post-discharge death [46 \% vs. 18 \%] (p {\textless} 0.001 for all). Notably, fewerthan half of guideline-eligible AF patientsreceived an OACprescription at discharge. Usage rates for other evidence-based therapiessuch as statins and beta-blockers,lagged in comparison to AMI patients free from AF. CONCLUSIONS: Our findings highlight the need to enhance efforts towards stroke prevention among AMI survivors with AF.}, publisher = {Journal of Atrial Fibrillation}, author = {{Amartya Kundu} and {Kevin O ’Day} and {Darleen M. Lessard} and {Joel M. Gore1} and {Steven A. Lubitz} and {Hong Yu} and {Mohammed W. Akhter} and {Daniel Z. Fisher} and {Robert M. Hayward Jr.} and {Nils Henninger} and {Jane S. Saczynski} and {Allan J. Walkey} and {Alok Kapoor} and {Jorge Yarzebski} and {Robert J. Goldberg} and {David D. McManus}}, year = {2018}, pmid = {29988239 PMCID: PMC6006973}, }
BACKGROUND: Atrial fibrillation (AF) is a common complication of acute myocardial infarction (AMI).The CHA2DS2VAScand CHADS2risk scoresare used to identifypatients with AF at risk for strokeand to guide oral anticoagulants (OAC) use, including patients with AMI. However, the epidemiology of AF, further stratifiedaccording to patients' risk of stroke, has not been wellcharacterized among those hospitalized for AMI. METHODS: We examined trends in the frequency of AF, rates of discharge OAC use, and post-discharge outcomes among 6,627 residents of the Worcester, Massachusetts area who survived hospitalization for AMI at 11 medical centers between 1997 and 2011. RESULTS: A total of 1,050AMI patients had AF (16%) andthe majority (91%)had a CHA2DS2VAScscore \textgreater2.AF rates were highest among patients in the highest stroke risk group.In comparison to patients without AF, patients with AMI and AF in the highest stroke risk category had higher rates of post-discharge complications, including higher 30-day re-hospitalization [27 % vs. 17 %], 30-day post-discharge death [10 % vs. 5%], and 1-year post-discharge death [46 % vs. 18 %] (p \textless 0.001 for all). Notably, fewerthan half of guideline-eligible AF patientsreceived an OACprescription at discharge. Usage rates for other evidence-based therapiessuch as statins and beta-blockers,lagged in comparison to AMI patients free from AF. CONCLUSIONS: Our findings highlight the need to enhance efforts towards stroke prevention among AMI survivors with AF.
ComprehENotes: An Instrument to Assess Patient EHR Note Reading Comprehension of Electronic Health Record Notes: Development and Validation.
Lalor, J; Wu, H; Chen, L; Mazor, K; and Yu, H
The Journal of Medical Internet Research. April 2018.
doi bibtex abstract
doi bibtex abstract
@article{lalor_comprehenotes:_2018, title = {{ComprehENotes}: {An} {Instrument} to {Assess} {Patient} {EHR} {Note} {Reading} {Comprehension} of {Electronic} {Health} {Record} {Notes}: {Development} and {Validation}}, doi = {DOI: 10.2196/jmir.9380}, abstract = {BACKGROUND: Patient portals are widely adopted in the United States and allow millions of patients access to their electronic health records (EHRs), including their EHR clinical notes. A patient's ability to understand the information in the EHR is dependent on their overall health literacy. Although many tests of health literacy exist, none specifically focuses on EHR note comprehension. OBJECTIVE: The aim of this paper was to develop an instrument to assess patients' EHR note comprehension. METHODS: We identified 6 common diseases or conditions (heart failure, diabetes, cancer, hypertension, chronic obstructive pulmonary disease, and liver failure) and selected 5 representative EHR notes for each disease or condition. One note that did not contain natural language text was removed. Questions were generated from these notes using Sentence Verification Technique and were analyzed using item response theory (IRT) to identify a set of questions that represent a good test of ability for EHR note comprehension. RESULTS: Using Sentence Verification Technique, 154 questions were generated from the 29 EHR notes initially obtained. Of these, 83 were manually selected for inclusion in the Amazon Mechanical Turk crowdsourcing tasks and 55 were ultimately retained following IRT analysis. A follow-up validation with a second Amazon Mechanical Turk task and IRT analysis confirmed that the 55 questions test a latent ability dimension for EHR note comprehension. A short test of 14 items was created along with the 55-item test. CONCLUSIONS: We developed ComprehENotes, an instrument for assessing EHR note comprehension from existing EHR notes, gathered responses using crowdsourcing, and used IRT to analyze those responses, thus resulting in a set of questions to measure EHR note comprehension. Crowdsourced responses from Amazon Mechanical Turk can be used to estimate item parameters and select a subset of items for inclusion in the test set using IRT. The final set of questions is the first test of EHR note comprehension.}, journal = {The Journal of Medical Internet Research}, author = {Lalor, J and Wu, H and Chen, L and Mazor, K and Yu, H}, month = apr, year = {2018}, pmid = {29695372 PMCID: PMC5943623}, }
BACKGROUND: Patient portals are widely adopted in the United States and allow millions of patients access to their electronic health records (EHRs), including their EHR clinical notes. A patient's ability to understand the information in the EHR is dependent on their overall health literacy. Although many tests of health literacy exist, none specifically focuses on EHR note comprehension. OBJECTIVE: The aim of this paper was to develop an instrument to assess patients' EHR note comprehension. METHODS: We identified 6 common diseases or conditions (heart failure, diabetes, cancer, hypertension, chronic obstructive pulmonary disease, and liver failure) and selected 5 representative EHR notes for each disease or condition. One note that did not contain natural language text was removed. Questions were generated from these notes using Sentence Verification Technique and were analyzed using item response theory (IRT) to identify a set of questions that represent a good test of ability for EHR note comprehension. RESULTS: Using Sentence Verification Technique, 154 questions were generated from the 29 EHR notes initially obtained. Of these, 83 were manually selected for inclusion in the Amazon Mechanical Turk crowdsourcing tasks and 55 were ultimately retained following IRT analysis. A follow-up validation with a second Amazon Mechanical Turk task and IRT analysis confirmed that the 55 questions test a latent ability dimension for EHR note comprehension. A short test of 14 items was created along with the 55-item test. CONCLUSIONS: We developed ComprehENotes, an instrument for assessing EHR note comprehension from existing EHR notes, gathered responses using crowdsourcing, and used IRT to analyze those responses, thus resulting in a set of questions to measure EHR note comprehension. Crowdsourced responses from Amazon Mechanical Turk can be used to estimate item parameters and select a subset of items for inclusion in the test set using IRT. The final set of questions is the first test of EHR note comprehension.
Detecting Hypoglycemia Incidence from Patients’ Secure Messages.
Chen, J; and Yu, H
In 2018.
bibtex
bibtex
@inproceedings{chen_detecting_2018, title = {Detecting {Hypoglycemia} {Incidence} from {Patients}’ {Secure} {Messages}}, author = {Chen, J and Yu, H}, year = {2018}, }
Extraction of Information Related to Adverse Drug Events from Electronic Health Record Notes: Design of an End-to-End Model Based on Deep Learning.
Li, F.; Liu, W.; and Yu, H.
JMIR medical informatics, 6(4): e12159. November 2018.
doi bibtex abstract
doi bibtex abstract
@article{li_extraction_2018, title = {Extraction of {Information} {Related} to {Adverse} {Drug} {Events} from {Electronic} {Health} {Record} {Notes}: {Design} of an {End}-to-{End} {Model} {Based} on {Deep} {Learning}}, volume = {6}, issn = {2291-9694}, shorttitle = {Extraction of {Information} {Related} to {Adverse} {Drug} {Events} from {Electronic} {Health} {Record} {Notes}}, doi = {10.2196/12159}, abstract = {BACKGROUND: Pharmacovigilance and drug-safety surveillance are crucial for monitoring adverse drug events (ADEs), but the main ADE-reporting systems such as Food and Drug Administration Adverse Event Reporting System face challenges such as underreporting. Therefore, as complementary surveillance, data on ADEs are extracted from electronic health record (EHR) notes via natural language processing (NLP). As NLP develops, many up-to-date machine-learning techniques are introduced in this field, such as deep learning and multi-task learning (MTL). However, only a few studies have focused on employing such techniques to extract ADEs. OBJECTIVE: We aimed to design a deep learning model for extracting ADEs and related information such as medications and indications. Since extraction of ADE-related information includes two steps-named entity recognition and relation extraction-our second objective was to improve the deep learning model using multi-task learning between the two steps. METHODS: We employed the dataset from the Medication, Indication and Adverse Drug Events (MADE) 1.0 challenge to train and test our models. This dataset consists of 1089 EHR notes of cancer patients and includes 9 entity types such as Medication, Indication, and ADE and 7 types of relations between these entities. To extract information from the dataset, we proposed a deep-learning model that uses a bidirectional long short-term memory (BiLSTM) conditional random field network to recognize entities and a BiLSTM-Attention network to extract relations. To further improve the deep-learning model, we employed three typical MTL methods, namely, hard parameter sharing, parameter regularization, and task relation learning, to build three MTL models, called HardMTL, RegMTL, and LearnMTL, respectively. RESULTS: Since extraction of ADE-related information is a two-step task, the result of the second step (ie, relation extraction) was used to compare all models. We used microaveraged precision, recall, and F1 as evaluation metrics. Our deep learning model achieved state-of-the-art results (F1=65.9\%), which is significantly higher than that (F1=61.7\%) of the best system in the MADE1.0 challenge. HardMTL further improved the F1 by 0.8\%, boosting the F1 to 66.7\%, whereas RegMTL and LearnMTL failed to boost the performance. CONCLUSIONS: Deep learning models can significantly improve the performance of ADE-related information extraction. MTL may be effective for named entity recognition and relation extraction, but it depends on the methods, data, and other factors. Our results can facilitate research on ADE detection, NLP, and machine learning.}, language = {eng}, number = {4}, journal = {JMIR medical informatics}, author = {Li, Fei and Liu, Weisong and Yu, Hong}, month = nov, year = {2018}, pmid = {30478023 PMCID: PMC6288593}, keywords = {adverse drug event, deep learning, multi-task learning, named entity recognition, natural language processing, relation extraction}, pages = {e12159}, }
BACKGROUND: Pharmacovigilance and drug-safety surveillance are crucial for monitoring adverse drug events (ADEs), but the main ADE-reporting systems such as Food and Drug Administration Adverse Event Reporting System face challenges such as underreporting. Therefore, as complementary surveillance, data on ADEs are extracted from electronic health record (EHR) notes via natural language processing (NLP). As NLP develops, many up-to-date machine-learning techniques are introduced in this field, such as deep learning and multi-task learning (MTL). However, only a few studies have focused on employing such techniques to extract ADEs. OBJECTIVE: We aimed to design a deep learning model for extracting ADEs and related information such as medications and indications. Since extraction of ADE-related information includes two steps-named entity recognition and relation extraction-our second objective was to improve the deep learning model using multi-task learning between the two steps. METHODS: We employed the dataset from the Medication, Indication and Adverse Drug Events (MADE) 1.0 challenge to train and test our models. This dataset consists of 1089 EHR notes of cancer patients and includes 9 entity types such as Medication, Indication, and ADE and 7 types of relations between these entities. To extract information from the dataset, we proposed a deep-learning model that uses a bidirectional long short-term memory (BiLSTM) conditional random field network to recognize entities and a BiLSTM-Attention network to extract relations. To further improve the deep-learning model, we employed three typical MTL methods, namely, hard parameter sharing, parameter regularization, and task relation learning, to build three MTL models, called HardMTL, RegMTL, and LearnMTL, respectively. RESULTS: Since extraction of ADE-related information is a two-step task, the result of the second step (ie, relation extraction) was used to compare all models. We used microaveraged precision, recall, and F1 as evaluation metrics. Our deep learning model achieved state-of-the-art results (F1=65.9%), which is significantly higher than that (F1=61.7%) of the best system in the MADE1.0 challenge. HardMTL further improved the F1 by 0.8%, boosting the F1 to 66.7%, whereas RegMTL and LearnMTL failed to boost the performance. CONCLUSIONS: Deep learning models can significantly improve the performance of ADE-related information extraction. MTL may be effective for named entity recognition and relation extraction, but it depends on the methods, data, and other factors. Our results can facilitate research on ADE detection, NLP, and machine learning.
Reference Standard Development to Train Natural Language Processing Algorithms to Detect Problematic Buprenorphine-Naloxone Therapy.
Celena B Peters; Fran Cunningham; Adam Gordon; Hong Yu; Cedric Salone; Jessica Zacher; Ronald Carico; Jianwei Leng; Nikolh Durley; Weisong Liu; Chao-Chin Lu; Emily Druhl; Feifan Liu; and Brian C Sauer
In VA Pharmacy Informatics Conference 2018, 2018.
Paper
bibtex
@inproceedings{celena_b_peters_reference_2018, title = {Reference {Standard} {Development} to {Train} {Natural} {Language} {Processing} {Algorithms} to {Detect} {Problematic} {Buprenorphine}-{Naloxone} {Therapy}}, url = {https://vapharmacytraining.remote-learner.net/mod/resource/view.php?id=13218}, booktitle = {{VA} {Pharmacy} {Informatics} {Conference} 2018}, author = {{Celena B Peters} and {Fran Cunningham} and {Adam Gordon} and {Hong Yu} and {Cedric Salone} and {Jessica Zacher} and {Ronald Carico} and {Jianwei Leng} and {Nikolh Durley} and {Weisong Liu} and {Chao-Chin Lu} and {Emily Druhl} and {Feifan Liu} and {Brian C Sauer}}, year = {2018}, }
Inadequate diversity of information resources searched in US-affiliated systematic reviews and meta-analyses: 2005-2016.
Pradhan, R.; Garnick, K.; Barkondaj, B.; Jordan, H. S.; Ash, A.; and Yu, H.
Journal of Clinical Epidemiology, 102: 50–62. October 2018.
doi bibtex abstract
doi bibtex abstract
@article{pradhan_inadequate_2018, title = {Inadequate diversity of information resources searched in {US}-affiliated systematic reviews and meta-analyses: 2005-2016}, volume = {102}, issn = {1878-5921}, shorttitle = {Inadequate diversity of information resources searched in {US}-affiliated systematic reviews and meta-analyses}, doi = {10.1016/j.jclinepi.2018.05.024}, abstract = {OBJECTIVE: Systematic reviews and meta-analyses (SRMAs) rely upon comprehensive searches into diverse resources that catalog primary studies. However, since what constitutes a comprehensive search is unclear, we examined trends in databases searched from 2005-2016, surrounding the publication of search guidelines in 2013, and associations between resources searched and evidence of publication bias in SRMAs involving human subjects. STUDY DESIGN: To ensure comparability of included SRMAs over the 12 years in the face of a near 100-fold increase of international SRMAs (mainly genetic studies from China) during this period, we focused on USA-affiliated SRMAs, manually reviewing 100 randomly selected SRMAs from those published in each year. After excluding articles (mainly for inadequate detail or out-of-scope methods), we identified factors associated with the databases searched, used network analysis to see which resources were simultaneously searched, and used logistic regression to link information sources searched with a lower chance of finding publication bias. RESULTS: Among 817 SRMA articles studied, the common resources used were Medline (95\%), EMBASE (44\%), and Cochrane (41\%). Methods journal SRMAs were most likely to use registries and grey literature resources. We found substantial co-searching of resources with only published materials, and not complemented by searches of registries and the grey literature. The 2013 guideline did not substantially increase searching of registries and grey literature resources to retrieve primary studies for the SRMAs. When used to augment Medline, Scopus (in all SRMAs) and ClinicalTrials.gov (in SRMAs with safety outcomes) were negatively associated with publication bias. CONCLUSIONS: Even SRMAs that search multiple sources tend to search similar resources. Our study supports searching Scopus and CTG in addition to Medline to reduce the chance of publication bias.}, language = {eng}, journal = {Journal of Clinical Epidemiology}, author = {Pradhan, Richeek and Garnick, Kyle and Barkondaj, Bikramjit and Jordan, Harmon S. and Ash, Arlene and Yu, Hong}, month = oct, year = {2018}, pmid = {29879464}, pmcid = {PMC6250602}, keywords = {Evidence synthesis, Grey literature, Literature databases, Meta-analysis, Publication bias, Systematic review, Trial registries}, pages = {50--62}, }
OBJECTIVE: Systematic reviews and meta-analyses (SRMAs) rely upon comprehensive searches into diverse resources that catalog primary studies. However, since what constitutes a comprehensive search is unclear, we examined trends in databases searched from 2005-2016, surrounding the publication of search guidelines in 2013, and associations between resources searched and evidence of publication bias in SRMAs involving human subjects. STUDY DESIGN: To ensure comparability of included SRMAs over the 12 years in the face of a near 100-fold increase of international SRMAs (mainly genetic studies from China) during this period, we focused on USA-affiliated SRMAs, manually reviewing 100 randomly selected SRMAs from those published in each year. After excluding articles (mainly for inadequate detail or out-of-scope methods), we identified factors associated with the databases searched, used network analysis to see which resources were simultaneously searched, and used logistic regression to link information sources searched with a lower chance of finding publication bias. RESULTS: Among 817 SRMA articles studied, the common resources used were Medline (95%), EMBASE (44%), and Cochrane (41%). Methods journal SRMAs were most likely to use registries and grey literature resources. We found substantial co-searching of resources with only published materials, and not complemented by searches of registries and the grey literature. The 2013 guideline did not substantially increase searching of registries and grey literature resources to retrieve primary studies for the SRMAs. When used to augment Medline, Scopus (in all SRMAs) and ClinicalTrials.gov (in SRMAs with safety outcomes) were negatively associated with publication bias. CONCLUSIONS: Even SRMAs that search multiple sources tend to search similar resources. Our study supports searching Scopus and CTG in addition to Medline to reduce the chance of publication bias.
ComprehENotes, an Instrument to Assess Patient Reading Comprehension of Electronic Health Record Notes: Development and Validation.
Lalor, J. P.; Wu, H.; Chen, L.; Mazor, K. M.; and Yu, H.
Journal of Medical Internet Research, 20(4): e139. April 2018.
doi bibtex abstract
doi bibtex abstract
@article{lalor_comprehenotes_2018, title = {{ComprehENotes}, an {Instrument} to {Assess} {Patient} {Reading} {Comprehension} of {Electronic} {Health} {Record} {Notes}: {Development} and {Validation}}, volume = {20}, issn = {1438-8871}, shorttitle = {{ComprehENotes}, an {Instrument} to {Assess} {Patient} {Reading} {Comprehension} of {Electronic} {Health} {Record} {Notes}}, doi = {10.2196/jmir.9380}, abstract = {BACKGROUND: Patient portals are widely adopted in the United States and allow millions of patients access to their electronic health records (EHRs), including their EHR clinical notes. A patient's ability to understand the information in the EHR is dependent on their overall health literacy. Although many tests of health literacy exist, none specifically focuses on EHR note comprehension. OBJECTIVE: The aim of this paper was to develop an instrument to assess patients' EHR note comprehension. METHODS: We identified 6 common diseases or conditions (heart failure, diabetes, cancer, hypertension, chronic obstructive pulmonary disease, and liver failure) and selected 5 representative EHR notes for each disease or condition. One note that did not contain natural language text was removed. Questions were generated from these notes using Sentence Verification Technique and were analyzed using item response theory (IRT) to identify a set of questions that represent a good test of ability for EHR note comprehension. RESULTS: Using Sentence Verification Technique, 154 questions were generated from the 29 EHR notes initially obtained. Of these, 83 were manually selected for inclusion in the Amazon Mechanical Turk crowdsourcing tasks and 55 were ultimately retained following IRT analysis. A follow-up validation with a second Amazon Mechanical Turk task and IRT analysis confirmed that the 55 questions test a latent ability dimension for EHR note comprehension. A short test of 14 items was created along with the 55-item test. CONCLUSIONS: We developed ComprehENotes, an instrument for assessing EHR note comprehension from existing EHR notes, gathered responses using crowdsourcing, and used IRT to analyze those responses, thus resulting in a set of questions to measure EHR note comprehension. Crowdsourced responses from Amazon Mechanical Turk can be used to estimate item parameters and select a subset of items for inclusion in the test set using IRT. The final set of questions is the first test of EHR note comprehension.}, language = {eng}, number = {4}, journal = {Journal of Medical Internet Research}, author = {Lalor, John P. and Wu, Hao and Chen, Li and Mazor, Kathleen M. and Yu, Hong}, month = apr, year = {2018}, pmid = {29695372}, pmcid = {PMC5943623}, keywords = {crowdsourcing, electronic health records, health literacy, psychometrics}, pages = {e139}, }
BACKGROUND: Patient portals are widely adopted in the United States and allow millions of patients access to their electronic health records (EHRs), including their EHR clinical notes. A patient's ability to understand the information in the EHR is dependent on their overall health literacy. Although many tests of health literacy exist, none specifically focuses on EHR note comprehension. OBJECTIVE: The aim of this paper was to develop an instrument to assess patients' EHR note comprehension. METHODS: We identified 6 common diseases or conditions (heart failure, diabetes, cancer, hypertension, chronic obstructive pulmonary disease, and liver failure) and selected 5 representative EHR notes for each disease or condition. One note that did not contain natural language text was removed. Questions were generated from these notes using Sentence Verification Technique and were analyzed using item response theory (IRT) to identify a set of questions that represent a good test of ability for EHR note comprehension. RESULTS: Using Sentence Verification Technique, 154 questions were generated from the 29 EHR notes initially obtained. Of these, 83 were manually selected for inclusion in the Amazon Mechanical Turk crowdsourcing tasks and 55 were ultimately retained following IRT analysis. A follow-up validation with a second Amazon Mechanical Turk task and IRT analysis confirmed that the 55 questions test a latent ability dimension for EHR note comprehension. A short test of 14 items was created along with the 55-item test. CONCLUSIONS: We developed ComprehENotes, an instrument for assessing EHR note comprehension from existing EHR notes, gathered responses using crowdsourcing, and used IRT to analyze those responses, thus resulting in a set of questions to measure EHR note comprehension. Crowdsourced responses from Amazon Mechanical Turk can be used to estimate item parameters and select a subset of items for inclusion in the test set using IRT. The final set of questions is the first test of EHR note comprehension.
Recent Trends in Oral Anticoagulant Use and Post-Discharge Complications Among Atrial Fibrillation Patients with Acute Myocardial Infarction.
Kundu, A.; Day, K. O.; Lessard, D. M.; Gore, J. M.; Lubitz, S. A.; Yu, H.; Akhter, M. W.; Fisher, D. Z.; Hayward, R. M.; Henninger, N.; Saczynski, J. S.; Walkey, A. J.; Kapoor, A.; Yarzebski, J.; Goldberg, R. J.; and McManus, D. D.
Journal of Atrial Fibrillation, 10(5): 1749. February 2018.
doi bibtex abstract
doi bibtex abstract
@article{kundu_recent_2018, title = {Recent {Trends} in {Oral} {Anticoagulant} {Use} and {Post}-{Discharge} {Complications} {Among} {Atrial} {Fibrillation} {Patients} with {Acute} {Myocardial} {Infarction}}, volume = {10}, issn = {1941-6911}, doi = {10.4022/jafib.1749}, abstract = {Background: Atrial fibrillation (AF) is a common complication of acute myocardial infarction (AMI).The CHA2DS2VAScand CHADS2risk scoresare used to identifypatients with AF at risk for strokeand to guide oral anticoagulants (OAC) use, including patients with AMI. However, the epidemiology of AF, further stratifiedaccording to patients' risk of stroke, has not been wellcharacterized among those hospitalized for AMI. Methods: We examined trends in the frequency of AF, rates of discharge OAC use, and post-discharge outcomes among 6,627 residents of the Worcester, Massachusetts area who survived hospitalization for AMI at 11 medical centers between 1997 and 2011. Results: A total of 1,050AMI patients had AF (16\%) andthe majority (91\%)had a CHA2DS2VAScscore {\textgreater}2.AF rates were highest among patients in the highest stroke risk group.In comparison to patients without AF, patients with AMI and AF in the highest stroke risk category had higher rates of post-discharge complications, including higher 30-day re-hospitalization [27 \% vs. 17 \%], 30-day post-discharge death [10 \% vs. 5\%], and 1-year post-discharge death [46 \% vs. 18 \%] (p {\textless} 0.001 for all). Notably, fewerthan half of guideline-eligible AF patientsreceived an OACprescription at discharge. Usage rates for other evidence-based therapiessuch as statins and beta-blockers,lagged in comparison to AMI patients free from AF. Conclusions: Our findings highlight the need to enhance efforts towards stroke prevention among AMI survivors with AF.}, language = {eng}, number = {5}, journal = {Journal of Atrial Fibrillation}, author = {Kundu, Amartya and Day, Kevin O. and Lessard, Darleen M. and Gore, Joel M. and Lubitz, Steven A. and Yu, Hong and Akhter, Mohammed W. and Fisher, Daniel Z. and Hayward, Robert M. and Henninger, Nils and Saczynski, Jane S. and Walkey, Allan J. and Kapoor, Alok and Yarzebski, Jorge and Goldberg, Robert J. and McManus, David D.}, month = feb, year = {2018}, pmid = {29988239}, pmcid = {PMC6006973}, keywords = {Anticoagulation, Atrial Fibrillation, Epidemiology, Outcomes}, pages = {1749}, }
Background: Atrial fibrillation (AF) is a common complication of acute myocardial infarction (AMI).The CHA2DS2VAScand CHADS2risk scoresare used to identifypatients with AF at risk for strokeand to guide oral anticoagulants (OAC) use, including patients with AMI. However, the epidemiology of AF, further stratifiedaccording to patients' risk of stroke, has not been wellcharacterized among those hospitalized for AMI. Methods: We examined trends in the frequency of AF, rates of discharge OAC use, and post-discharge outcomes among 6,627 residents of the Worcester, Massachusetts area who survived hospitalization for AMI at 11 medical centers between 1997 and 2011. Results: A total of 1,050AMI patients had AF (16%) andthe majority (91%)had a CHA2DS2VAScscore \textgreater2.AF rates were highest among patients in the highest stroke risk group.In comparison to patients without AF, patients with AMI and AF in the highest stroke risk category had higher rates of post-discharge complications, including higher 30-day re-hospitalization [27 % vs. 17 %], 30-day post-discharge death [10 % vs. 5%], and 1-year post-discharge death [46 % vs. 18 %] (p \textless 0.001 for all). Notably, fewerthan half of guideline-eligible AF patientsreceived an OACprescription at discharge. Usage rates for other evidence-based therapiessuch as statins and beta-blockers,lagged in comparison to AMI patients free from AF. Conclusions: Our findings highlight the need to enhance efforts towards stroke prevention among AMI survivors with AF.
Accuracy of International Classification of Disease Clinical Modification Codes for Detecting Bleeding Events in Electronic Health Records and When to Use Them.
Wang, V; McManus, D; Ash, A; Hoaglin, D; and Yu, H
In 2018.
bibtex
bibtex
@inproceedings{wang_accuracy_2018, title = {Accuracy of {International} {Classification} of {Disease} {Clinical} {Modification} {Codes} for {Detecting} {Bleeding} {Events} in {Electronic} {Health} {Records} and {When} to {Use} {Them}}, author = {Wang, V and McManus, D and Ash, A and Hoaglin, D and Yu, H}, year = {2018}, }
Frontiers of Clinical Information Extraction: Current Progress in Medication and Adverse Drug Event Detection from Electronic Health Records.
Abhyuday Jagannatha; Feifan Liu; Weisong Liu; and Hong Yu
In 9th Annual Pharmacy Informatics Conference, 2018.
Paper
bibtex
@inproceedings{abhyuday_jagannatha_frontiers_2018, title = {Frontiers of {Clinical} {Information} {Extraction}: {Current} {Progress} in {Medication} and {Adverse} {Drug} {Event} {Detection} from {Electronic} {Health} {Records}}, url = {https://informaticssummit2018.zerista.com/event/member/470512}, booktitle = {9th {Annual} {Pharmacy} {Informatics} {Conference}}, author = {{Abhyuday Jagannatha} and {Feifan Liu} and {Weisong Liu} and {Hong Yu}}, year = {2018}, }
Panel – Deep Learning for Healthcare - Hype or the Real Thing?.
J. Sun; B. Westover; H. Yu; D. Sontag; and M. Ghassemi
In AMIA 2018 Informatics Summit, 2018.
bibtex
bibtex
@inproceedings{j._sun_panel_2018, title = {Panel – {Deep} {Learning} for {Healthcare} - {Hype} or the {Real} {Thing}?}, booktitle = {{AMIA} 2018 {Informatics} {Summit}}, author = {{J. Sun} and {B. Westover} and {H. Yu} and {D. Sontag} and {M. Ghassemi}}, year = {2018}, }
Panel - Frontiers of Clinical Information Extraction: Current Progress in Medication and Adverse Drug Event Detection from Electronic Health Records.
Feifan Liu; Abhyuday Jagannatha; and Hong Yu
In AMIA 2018 Informatics Summit, 2018.
bibtex
bibtex
@inproceedings{feifan_liu_panel_2018, title = {Panel - {Frontiers} of {Clinical} {Information} {Extraction}: {Current} {Progress} in {Medication} and {Adverse} {Drug} {Event} {Detection} from {Electronic} {Health} {Records}}, booktitle = {{AMIA} 2018 {Informatics} {Summit}}, author = {{Feifan Liu} and {Abhyuday Jagannatha} and {Hong Yu}}, year = {2018}, }
2017
(13)
Meta Networks.
Munkhdalai, T.; and Yu, H.
In ICML, volume 70, pages 2554–2563, Sydney, Australia, August 2017.
bibtex abstract
bibtex abstract
@inproceedings{munkhdalai_meta_2017, address = {Sydney, Australia}, title = {Meta {Networks}}, volume = {70}, abstract = {Neural networks have been successfully applied in applications with a large amount of labeled data. However, the task of rapid generalization on new concepts with small training data while preserving performances on previously learned ones still presents a significant challenge to neural network models. In this work, we introduce a novel meta learning method, Meta Networks (MetaNet), that learns a meta-level knowledge across tasks and shifts its inductive biases via fast parameterization for rapid generalization. When evaluated on Omniglot and Mini-ImageNet benchmarks, our MetaNet models achieve a near human-level performance and outperform the baseline approaches by up to 6\% accuracy. We demonstrate several appealing properties of MetaNet relating to generalization and continual learning.}, booktitle = {{ICML}}, author = {Munkhdalai, Tsendsuren and Yu, Hong}, month = aug, year = {2017}, pmid = {31106300; PMCID: PMC6519722}, pages = {2554--2563}, }
Neural networks have been successfully applied in applications with a large amount of labeled data. However, the task of rapid generalization on new concepts with small training data while preserving performances on previously learned ones still presents a significant challenge to neural network models. In this work, we introduce a novel meta learning method, Meta Networks (MetaNet), that learns a meta-level knowledge across tasks and shifts its inductive biases via fast parameterization for rapid generalization. When evaluated on Omniglot and Mini-ImageNet benchmarks, our MetaNet models achieve a near human-level performance and outperform the baseline approaches by up to 6% accuracy. We demonstrate several appealing properties of MetaNet relating to generalization and continual learning.
Neural Semantic Encoders.
Munkhdalai, T; and Yu, H.
In European Chapter of the Association for Computational Linguistics 2017 (EACL), volume 1, pages 397–407, April 2017.
Paper
bibtex
abstract
@inproceedings{munkhdalai_neural_2017, title = {Neural {Semantic} {Encoders}}, volume = {1}, url = {https://arxiv.org/pdf/1607.04315v2.pdf}, abstract = {We present a memory augmented neural network for natural language understanding: Neural Semantic Encoders. NSE is equipped with a novel memory update rule and has a variable sized encoding memory that evolves over time and maintains the understanding of input sequences through read\vphantom{\{}\}, compose and write operations. NSE can also access multiple and shared memories. In this paper, we demonstrated the effectiveness and the flexibility of NSE on five different natural language tasks: natural language inference, question answering, sentence classification, document sentiment analysis and machine translation where NSE achieved state-of-the-art performance when evaluated on publically available benchmarks. For example, our shared-memory model showed an encouraging result on neural machine translation, improving an attention-based baseline by approximately 1.0 BLEU.}, booktitle = {European {Chapter} of the {Association} for {Computational} {Linguistics} 2017 ({EACL})}, author = {Munkhdalai, T and Yu, Hong}, month = apr, year = {2017}, pmid = {29081578 PMCID: PMC5657452}, pages = {397--407}, }
We present a memory augmented neural network for natural language understanding: Neural Semantic Encoders. NSE is equipped with a novel memory update rule and has a variable sized encoding memory that evolves over time and maintains the understanding of input sequences through readp̌hantom\\, compose and write operations. NSE can also access multiple and shared memories. In this paper, we demonstrated the effectiveness and the flexibility of NSE on five different natural language tasks: natural language inference, question answering, sentence classification, document sentiment analysis and machine translation where NSE achieved state-of-the-art performance when evaluated on publically available benchmarks. For example, our shared-memory model showed an encouraging result on neural machine translation, improving an attention-based baseline by approximately 1.0 BLEU.
Detecting Opioid-Related Aberrant Behavior using Natural Language Processing.
Lingeman, J. M.; Wang, P.; Becker, W.; and Yu, H.
AMIA ... Annual Symposium proceedings. AMIA Symposium, 2017: 1179–1185. 2017.
bibtex abstract
bibtex abstract
@article{lingeman_detecting_2017, title = {Detecting {Opioid}-{Related} {Aberrant} {Behavior} using {Natural} {Language} {Processing}}, volume = {2017}, issn = {1942-597X}, abstract = {The United States is in the midst of a prescription opioid epidemic, with the number of yearly opioid-related overdose deaths increasing almost fourfold since 20001. To more effectively prevent unintentional opioid overdoses, the medical profession requires robust surveillance tools that can effectively identify at-risk patients. Drug-related aberrant behaviors observed in the clinical context may be important indicators of patients at risk for or actively abusing opioids. In this paper, we describe a natural language processing (NLP) method for automatic surveillance of aberrant behavior in medical notes relying only on the text of the notes. This allows for a robust and generalizable system that can be used for high volume analysis of electronic medical records for potential predictors of opioid abuse.}, language = {eng}, journal = {AMIA ... Annual Symposium proceedings. AMIA Symposium}, author = {Lingeman, Jesse M. and Wang, Priscilla and Becker, William and Yu, Hong}, year = {2017}, pmid = {29854186 PMCID: PMC5977697}, pages = {1179--1185}, }
The United States is in the midst of a prescription opioid epidemic, with the number of yearly opioid-related overdose deaths increasing almost fourfold since 20001. To more effectively prevent unintentional opioid overdoses, the medical profession requires robust surveillance tools that can effectively identify at-risk patients. Drug-related aberrant behaviors observed in the clinical context may be important indicators of patients at risk for or actively abusing opioids. In this paper, we describe a natural language processing (NLP) method for automatic surveillance of aberrant behavior in medical notes relying only on the text of the notes. This allows for a robust and generalizable system that can be used for high volume analysis of electronic medical records for potential predictors of opioid abuse.
CIFT: Crowd-Informed Fine-Tuning to Improve Machine Learning Ability.
Lalor, J; Wu, H; and Yu, H
In February 2017.
bibtex abstract
bibtex abstract
@inproceedings{lalor_cift:_2017, title = {{CIFT}: {Crowd}-{Informed} {Fine}-{Tuning} to {Improve} {Machine} {Learning} {Ability}.}, abstract = {tem Response Theory (IRT) allows for measuring ability of Machine Learning models as compared to a human population. However, it is difficult to create a large dataset to train the ability of deep neural network models (DNNs). We propose Crowd-Informed Fine-Tuning (CIFT) as a new training process, where a pre-trained model is fine-tuned with a specialized supplemental training set obtained via IRT model-fitting on a large set of crowdsourced response patterns. With CIFT we can leverage the specialized set of data obtained through IRT to inform parameter tuning in DNNs. We experiment with two loss functions in CIFT to represent (i) memorization of fine-tuning items and (ii) learning a probability distribution over potential labels that is similar to the crowdsourced distribution over labels to simulate crowd knowledge. Our results show that CIFT improves ability for a state-of-the-art DNN model for Recognizing Textual Entailment (RTE) tasks and is generalizable to a large-scale RTE test set.}, author = {Lalor, J and Wu, H and Yu, H}, month = feb, year = {2017}, }
tem Response Theory (IRT) allows for measuring ability of Machine Learning models as compared to a human population. However, it is difficult to create a large dataset to train the ability of deep neural network models (DNNs). We propose Crowd-Informed Fine-Tuning (CIFT) as a new training process, where a pre-trained model is fine-tuned with a specialized supplemental training set obtained via IRT model-fitting on a large set of crowdsourced response patterns. With CIFT we can leverage the specialized set of data obtained through IRT to inform parameter tuning in DNNs. We experiment with two loss functions in CIFT to represent (i) memorization of fine-tuning items and (ii) learning a probability distribution over potential labels that is similar to the crowdsourced distribution over labels to simulate crowd knowledge. Our results show that CIFT improves ability for a state-of-the-art DNN model for Recognizing Textual Entailment (RTE) tasks and is generalizable to a large-scale RTE test set.
Assessing Electronic Health Record Readability.
Zheng, J; and Yu, H
In 2017.
bibtex
bibtex
@inproceedings{zheng_assessing_2017, title = {Assessing {Electronic} {Health} {Record} {Readability}.}, author = {Zheng, J and Yu, H}, year = {2017}, }
Reasoning with memory augmented neural networks for language comprehension.
Munkhdalai, T.; and Yu, H.
5th International Conference on Learning Representations (ICLR). 2017.
Paper
bibtex
abstract
@article{munkhdalai_reasoning_2017, title = {Reasoning with memory augmented neural networks for language comprehension.}, url = {https://arxiv.org/abs/1610.06454}, abstract = {Hypothesis testing is an important cognitive process that supports human reasoning. In this paper, we introduce a computational hypothesis testing approach based on memory augmented neural networks. Our approach involves a hypothesis testing loop that reconsiders and progressively refines a previously formed hypothesis in order to generate new hypotheses to test. We apply the proposed approach to language comprehension task by using Neural Semantic Encoders (NSE). Our NSE models achieve the state-of-the-art results showing an absolute improvement of 1.2\% to 2.6\% accuracy over previous results obtained by single and ensemble systems on standard machine comprehension benchmarks such as the Children's Book Test (CBT) and Who-Did-What (WDW) news article datasets.}, urldate = {2017-06-02}, journal = {5th International Conference on Learning Representations (ICLR)}, author = {Munkhdalai, Tsendsuren and Yu, Hong}, year = {2017}, }
Hypothesis testing is an important cognitive process that supports human reasoning. In this paper, we introduce a computational hypothesis testing approach based on memory augmented neural networks. Our approach involves a hypothesis testing loop that reconsiders and progressively refines a previously formed hypothesis in order to generate new hypotheses to test. We apply the proposed approach to language comprehension task by using Neural Semantic Encoders (NSE). Our NSE models achieve the state-of-the-art results showing an absolute improvement of 1.2% to 2.6% accuracy over previous results obtained by single and ensemble systems on standard machine comprehension benchmarks such as the Children's Book Test (CBT) and Who-Did-What (WDW) news article datasets.
Readability Formulas and User Perceptions of Electronic Health Records Difficulty: A Corpus Study.
Zheng, J.; and Yu, H.
Journal of Medical Internet Research, 19(3): e59. 2017.
Paper
doi
bibtex
abstract
@article{zheng_readability_2017, title = {Readability {Formulas} and {User} {Perceptions} of {Electronic} {Health} {Records} {Difficulty}: {A} {Corpus} {Study}}, volume = {19}, copyright = {Unless stated otherwise, all articles are open-access distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work (}, shorttitle = {Readability {Formulas} and {User} {Perceptions} of {Electronic} {Health} {Records} {Difficulty}}, url = {https://www.jmir.org/2017/3/e59/}, doi = {10.2196/jmir.6962}, abstract = {Background: Electronic health records (EHRs) are a rich resource for developing applications to engage patients and foster patient activation, thus holding a strong potential to enhance patient-centered care. Studies have shown that providing patients with access to their own EHR notes may improve the understanding of their own clinical conditions and treatments, leading to improved health care outcomes. However, the highly technical language in EHR notes impedes patients’ comprehension. Numerous studies have evaluated the difficulty of health-related text using readability formulas such as Flesch-Kincaid Grade Level (FKGL), Simple Measure of Gobbledygook (SMOG), and Gunning-Fog Index (GFI). They conclude that the materials are often written at a grade level higher than common recommendations. Objective: The objective of our study was to explore the relationship between the aforementioned readability formulas and the laypeople’s perceived difficulty on 2 genres of text: general health information and EHR notes. We also validated the formulas’ appropriateness and generalizability on predicting difficulty levels of highly complex technical documents. Methods: We collected 140 Wikipedia articles on diabetes and 242 EHR notes with diabetes International Classification of Diseases, Ninth Revision code. We recruited 15 Amazon Mechanical Turk (AMT) users to rate difficulty levels of the documents. Correlations between laypeople’s perceived difficulty levels and readability formula scores were measured, and their difference was tested. We also compared word usage and the impact of medical concepts of the 2 genres of text. Results: The distributions of both readability formulas’ scores (P{\textless}.001) and laypeople’s perceptions (P=.002) on the 2 genres were different. Correlations of readability predictions and laypeople’s perceptions were weak. Furthermore, despite being graded at similar levels, documents of different genres were still perceived with different difficulty (P{\textless}.001). Word usage in the 2 related genres still differed significantly (P{\textless}.001). Conclusions: Our findings suggested that the readability formulas’ predictions did not align with perceived difficulty in either text genre. The widely used readability formulas were highly correlated with each other but did not show adequate correlation with readers’ perceived difficulty. Therefore, they were not appropriate to assess the readability of EHR notes. [J Med Internet Res 2017;19(3):e59]}, language = {en}, number = {3}, urldate = {2017-03-06}, journal = {Journal of Medical Internet Research}, author = {Zheng, Jiaping and Yu, Hong}, year = {2017}, pmid = {28254738 PMCID: PMC5355629}, pages = {e59}, }
Background: Electronic health records (EHRs) are a rich resource for developing applications to engage patients and foster patient activation, thus holding a strong potential to enhance patient-centered care. Studies have shown that providing patients with access to their own EHR notes may improve the understanding of their own clinical conditions and treatments, leading to improved health care outcomes. However, the highly technical language in EHR notes impedes patients’ comprehension. Numerous studies have evaluated the difficulty of health-related text using readability formulas such as Flesch-Kincaid Grade Level (FKGL), Simple Measure of Gobbledygook (SMOG), and Gunning-Fog Index (GFI). They conclude that the materials are often written at a grade level higher than common recommendations. Objective: The objective of our study was to explore the relationship between the aforementioned readability formulas and the laypeople’s perceived difficulty on 2 genres of text: general health information and EHR notes. We also validated the formulas’ appropriateness and generalizability on predicting difficulty levels of highly complex technical documents. Methods: We collected 140 Wikipedia articles on diabetes and 242 EHR notes with diabetes International Classification of Diseases, Ninth Revision code. We recruited 15 Amazon Mechanical Turk (AMT) users to rate difficulty levels of the documents. Correlations between laypeople’s perceived difficulty levels and readability formula scores were measured, and their difference was tested. We also compared word usage and the impact of medical concepts of the 2 genres of text. Results: The distributions of both readability formulas’ scores (P\textless.001) and laypeople’s perceptions (P=.002) on the 2 genres were different. Correlations of readability predictions and laypeople’s perceptions were weak. Furthermore, despite being graded at similar levels, documents of different genres were still perceived with different difficulty (P\textless.001). Word usage in the 2 related genres still differed significantly (P\textless.001). Conclusions: Our findings suggested that the readability formulas’ predictions did not align with perceived difficulty in either text genre. The widely used readability formulas were highly correlated with each other but did not show adequate correlation with readers’ perceived difficulty. Therefore, they were not appropriate to assess the readability of EHR notes. [J Med Internet Res 2017;19(3):e59]
Neural Tree Indexers for Text Understanding.
Munkhdalai, T.; and Yu, H.
In Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, pages 11–21, Valencia, Spain, April 2017. Association for Computational Linguistics
Paper
bibtex
abstract
@inproceedings{munkhdalai_neural_2017-1, address = {Valencia, Spain}, title = {Neural {Tree} {Indexers} for {Text} {Understanding}}, url = {http://www.aclweb.org/anthology/E17-1002}, abstract = {Recurrent neural networks (RNNs) process input text sequentially and model the conditional transition between word tokens. In contrast, the advantages of recursive networks include that they explicitly model the compositionality and the recursive structure of natural language. However, the current recursive architecture is limited by its dependence on syntactic tree. In this paper, we introduce a robust syntactic parsing-independent tree structured model, Neural Tree Indexers (NTI) that provides a middle ground between the sequential RNNs and the syntactic treebased recursive models. NTI constructs a full n-ary tree by processing the input text with its node function in a bottom-up fashion. Attention mechanism can then be applied to both structure and node function. We implemented and evaluated a binary tree model of NTI, showing the model achieved the state-of-the-art performance on three different NLP tasks: natural language inference, answer sentence selection, and sentence classification, outperforming state-of-the-art recurrent and recursive neural networks.}, urldate = {2017-04-02}, booktitle = {Proceedings of the 15th {Conference} of the {European} {Chapter} of the {Association} for {Computational} {Linguistics}: {Volume} 1, {Long} {Papers}}, publisher = {Association for Computational Linguistics}, author = {Munkhdalai, Tsendsuren and Yu, Hong}, month = apr, year = {2017}, pages = {11--21}, }
Recurrent neural networks (RNNs) process input text sequentially and model the conditional transition between word tokens. In contrast, the advantages of recursive networks include that they explicitly model the compositionality and the recursive structure of natural language. However, the current recursive architecture is limited by its dependence on syntactic tree. In this paper, we introduce a robust syntactic parsing-independent tree structured model, Neural Tree Indexers (NTI) that provides a middle ground between the sequential RNNs and the syntactic treebased recursive models. NTI constructs a full n-ary tree by processing the input text with its node function in a bottom-up fashion. Attention mechanism can then be applied to both structure and node function. We implemented and evaluated a binary tree model of NTI, showing the model achieved the state-of-the-art performance on three different NLP tasks: natural language inference, answer sentence selection, and sentence classification, outperforming state-of-the-art recurrent and recursive neural networks.
Generating a Test of Electronic Health Record Narrative Comprehension with Item Response Theory.
Lalor, J; Wu, H; Chen, L; Mazor, K; and Yu, H
In November 2017.
bibtex abstract
bibtex abstract
@inproceedings{lalor_generating_2017, title = {Generating a {Test} of {Electronic} {Health} {Record} {Narrative} {Comprehension} with {Item} {Response} {Theory}.}, abstract = {In this work, we report the development of a new instrument to test patients' ability to comprehend EHR notes. Our instrument comprises of a test set of question and answer pairs that are based on the semantic content of EHR notes and selected using the psychometrics method Item Response Theory.}, author = {Lalor, J and Wu, H and Chen, L and Mazor, K and Yu, H}, month = nov, year = {2017}, }
In this work, we report the development of a new instrument to test patients' ability to comprehend EHR notes. Our instrument comprises of a test set of question and answer pairs that are based on the semantic content of EHR notes and selected using the psychometrics method Item Response Theory.
An Analysis of Ability in Deep Neural Networks.
Lalor, J. P.; Wu, H.; Munkhdalai, T.; and Yu, H.
arXiv preprint arXiv:1702.04811. 2017.
bibtex
bibtex
@article{lalor_analysis_2017, title = {An {Analysis} of {Ability} in {Deep} {Neural} {Networks}}, journal = {arXiv preprint arXiv:1702.04811}, author = {Lalor, John P. and Wu, Hao and Munkhdalai, Tsendsuren and Yu, Hong}, year = {2017}, }
Ranking Medical Terms to Support Expansion of Lay Language Resources for Patient Comprehension of Electronic Health Record Notes: Adapted Distant Supervision Approach.
Chen, J.; Jagannatha, A. N.; Fodeh, S. J.; and Yu, H.
JMIR medical informatics, 5(4): e42. October 2017.
doi bibtex abstract
doi bibtex abstract
@article{chen_ranking_2017, title = {Ranking {Medical} {Terms} to {Support} {Expansion} of {Lay} {Language} {Resources} for {Patient} {Comprehension} of {Electronic} {Health} {Record} {Notes}: {Adapted} {Distant} {Supervision} {Approach}}, volume = {5}, issn = {2291-9694}, shorttitle = {Ranking {Medical} {Terms} to {Support} {Expansion} of {Lay} {Language} {Resources} for {Patient} {Comprehension} of {Electronic} {Health} {Record} {Notes}}, doi = {10.2196/medinform.8531}, abstract = {BACKGROUND: Medical terms are a major obstacle for patients to comprehend their electronic health record (EHR) notes. Clinical natural language processing (NLP) systems that link EHR terms to lay terms or definitions allow patients to easily access helpful information when reading through their EHR notes, and have shown to improve patient EHR comprehension. However, high-quality lay language resources for EHR terms are very limited in the public domain. Because expanding and curating such a resource is a costly process, it is beneficial and even necessary to identify terms important for patient EHR comprehension first. OBJECTIVE: We aimed to develop an NLP system, called adapted distant supervision (ADS), to rank candidate terms mined from EHR corpora. We will give EHR terms ranked as high by ADS a higher priority for lay language annotation-that is, creating lay definitions for these terms. METHODS: Adapted distant supervision uses distant supervision from consumer health vocabulary and transfer learning to adapt itself to solve the problem of ranking EHR terms in the target domain. We investigated 2 state-of-the-art transfer learning algorithms (ie, feature space augmentation and supervised distant supervision) and designed 5 types of learning features, including distributed word representations learned from large EHR data for ADS. For evaluating ADS, we asked domain experts to annotate 6038 candidate terms as important or nonimportant for EHR comprehension. We then randomly divided these data into the target-domain training data (1000 examples) and the evaluation data (5038 examples). We compared ADS with 2 strong baselines, including standard supervised learning, on the evaluation data. RESULTS: The ADS system using feature space augmentation achieved the best average precision, 0.850, on the evaluation set when using 1000 target-domain training examples. The ADS system using supervised distant supervision achieved the best average precision, 0.819, on the evaluation set when using only 100 target-domain training examples. The 2 ADS systems both performed significantly better than the baseline systems (P{\textless}.001 for all measures and all conditions). Using a rich set of learning features contributed to ADS's performance substantially. CONCLUSIONS: ADS can effectively rank terms mined from EHRs. Transfer learning improved ADS's performance even with a small number of target-domain training examples. EHR terms prioritized by ADS were used to expand a lay language resource that supports patient EHR comprehension. The top 10,000 EHR terms ranked by ADS are available upon request.}, language = {eng}, number = {4}, journal = {JMIR medical informatics}, author = {Chen, Jinying and Jagannatha, Abhyuday N. and Fodeh, Samah J. and Yu, Hong}, month = oct, year = {2017}, pmid = {29089288}, pmcid = {PMC5686421}, keywords = {Information extraction, electronic health records, lexical entry selection, natural language processing, transfer learning}, pages = {e42}, }
BACKGROUND: Medical terms are a major obstacle for patients to comprehend their electronic health record (EHR) notes. Clinical natural language processing (NLP) systems that link EHR terms to lay terms or definitions allow patients to easily access helpful information when reading through their EHR notes, and have shown to improve patient EHR comprehension. However, high-quality lay language resources for EHR terms are very limited in the public domain. Because expanding and curating such a resource is a costly process, it is beneficial and even necessary to identify terms important for patient EHR comprehension first. OBJECTIVE: We aimed to develop an NLP system, called adapted distant supervision (ADS), to rank candidate terms mined from EHR corpora. We will give EHR terms ranked as high by ADS a higher priority for lay language annotation-that is, creating lay definitions for these terms. METHODS: Adapted distant supervision uses distant supervision from consumer health vocabulary and transfer learning to adapt itself to solve the problem of ranking EHR terms in the target domain. We investigated 2 state-of-the-art transfer learning algorithms (ie, feature space augmentation and supervised distant supervision) and designed 5 types of learning features, including distributed word representations learned from large EHR data for ADS. For evaluating ADS, we asked domain experts to annotate 6038 candidate terms as important or nonimportant for EHR comprehension. We then randomly divided these data into the target-domain training data (1000 examples) and the evaluation data (5038 examples). We compared ADS with 2 strong baselines, including standard supervised learning, on the evaluation data. RESULTS: The ADS system using feature space augmentation achieved the best average precision, 0.850, on the evaluation set when using 1000 target-domain training examples. The ADS system using supervised distant supervision achieved the best average precision, 0.819, on the evaluation set when using only 100 target-domain training examples. The 2 ADS systems both performed significantly better than the baseline systems (P\textless.001 for all measures and all conditions). Using a rich set of learning features contributed to ADS's performance substantially. CONCLUSIONS: ADS can effectively rank terms mined from EHRs. Transfer learning improved ADS's performance even with a small number of target-domain training examples. EHR terms prioritized by ADS were used to expand a lay language resource that supports patient EHR comprehension. The top 10,000 EHR terms ranked by ADS are available upon request.
Improving Machine Learning Ability with Fine-Tuning.
Lalor, J.; Wu, H.; and Yu, H.
In ICML, 2017.
bibtex
bibtex
@inproceedings{lalor_improving_2017, title = {Improving {Machine} {Learning} {Ability} with {Fine}-{Tuning}}, booktitle = {{ICML}}, author = {Lalor, John and Wu, Hao and Yu, Hong}, year = {2017}, }
An Analysis of Machine Learning Intelligence.
Lalor, J. P.; Wu, H.; Munkhdalai, T.; and Yu, H.
arXiv:1702.04811 [cs]. February 2017.
arXiv: 1702.04811
Paper
bibtex
abstract
@article{lalor_analysis_2017-1, title = {An {Analysis} of {Machine} {Learning} {Intelligence}}, url = {http://arxiv.org/abs/1702.04811}, abstract = {Deep neural networks (DNNs) have set state of the art results in many machine learning and NLP tasks. However, we do not have a strong understanding of what DNN models learn. In this paper, we examine learning in DNNs through analysis of their outputs. We compare DNN performance directly to a human population, and use characteristics of individual data points such as difficulty to see how well models perform on easy and hard examples. We investigate how training size and the incorporation of noise affect a DNN's ability to generalize and learn. Our experiments show that unlike traditional machine learning models (e.g., Naive Bayes, Decision Trees), DNNs exhibit human-like learning properties. As they are trained with more data, they are more able to distinguish between easy and difficult items, and performance on easy items improves at a higher rate than difficult items. We find that different DNN models exhibit different strengths in learning and are robust to noise in training data.}, urldate = {2017-02-26}, journal = {arXiv:1702.04811 [cs]}, author = {Lalor, John P. and Wu, Hao and Munkhdalai, Tsendsuren and Yu, Hong}, month = feb, year = {2017}, note = {arXiv: 1702.04811}, keywords = {Computer Science - Computation and Language}, }
Deep neural networks (DNNs) have set state of the art results in many machine learning and NLP tasks. However, we do not have a strong understanding of what DNN models learn. In this paper, we examine learning in DNNs through analysis of their outputs. We compare DNN performance directly to a human population, and use characteristics of individual data points such as difficulty to see how well models perform on easy and hard examples. We investigate how training size and the incorporation of noise affect a DNN's ability to generalize and learn. Our experiments show that unlike traditional machine learning models (e.g., Naive Bayes, Decision Trees), DNNs exhibit human-like learning properties. As they are trained with more data, they are more able to distinguish between easy and difficult items, and performance on easy items improves at a higher rate than difficult items. We find that different DNN models exhibit different strengths in learning and are robust to noise in training data.
2016
(9)
Structured prediction models for RNN based sequence labeling in clinical text.
Jagannatha, A. N.; and Yu, H.
In Proceedings of the Conference on Empirical Methods in Natural Language Processing, volume 2016, pages 856–865, November 2016.
bibtex abstract
bibtex abstract
@inproceedings{jagannatha_structured_2016, title = {Structured prediction models for {RNN} based sequence labeling in clinical text}, volume = {2016}, abstract = {Sequence labeling is a widely used method for named entity recognition and information extraction from unstructured natural language data. In clinical domain one major application of sequence labeling involves extraction of medical entities such as medication, indication, and side-effects from Electronic Health Record narratives. Sequence labeling in this domain, presents its own set of challenges and objectives. In this work we experimented with various CRF based structured learning models with Recurrent Neural Networks. We extend the previously studied LSTM-CRF models with explicit modeling of pairwise potentials. We also propose an approximate version of skip-chain CRF inference with RNN potentials. We use these methodologies for structured prediction in order to improve the exact phrase detection of various medical entities.}, language = {eng}, booktitle = {Proceedings of the {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing}}, author = {Jagannatha, Abhyuday N. and Yu, Hong}, month = nov, year = {2016}, pmid = {28004040 PMCID: PMC5167535}, keywords = {Computer Science - Computation and Language}, pages = {856--865}, }
Sequence labeling is a widely used method for named entity recognition and information extraction from unstructured natural language data. In clinical domain one major application of sequence labeling involves extraction of medical entities such as medication, indication, and side-effects from Electronic Health Record narratives. Sequence labeling in this domain, presents its own set of challenges and objectives. In this work we experimented with various CRF based structured learning models with Recurrent Neural Networks. We extend the previously studied LSTM-CRF models with explicit modeling of pairwise potentials. We also propose an approximate version of skip-chain CRF inference with RNN potentials. We use these methodologies for structured prediction in order to improve the exact phrase detection of various medical entities.
RETAIN: An Interpretable Predictive Model for Healthcare using Reverse Time Attention Mechanism.
Choi, E.; Bahadori, M. T.; Sun, J.; Kulas, J.; Schuetz, A.; and Stewart, W.
In Advances in Neural Information Processing Systems, pages 3504–3512, 2016.
Paper
bibtex
@inproceedings{choi_retain:_2016, title = {{RETAIN}: {An} {Interpretable} {Predictive} {Model} for {Healthcare} using {Reverse} {Time} {Attention} {Mechanism}}, shorttitle = {{RETAIN}}, url = {http://papers.nips.cc/paper/6321-retain-an-interpretable-predictive-model-for-healthcare-using-reverse-time-attention-mechanism}, urldate = {2017-01-12}, booktitle = {Advances in {Neural} {Information} {Processing} {Systems}}, author = {Choi, Edward and Bahadori, Mohammad Taha and Sun, Jimeng and Kulas, Joshua and Schuetz, Andy and Stewart, Walter}, year = {2016}, pages = {3504--3512}, }
Learning to Rank Scientific Documents from the Crowd.
Lingeman, J. M; and Yu, H.
arXiv:1611.01400. November 2016.
Paper
bibtex
abstract
@article{lingeman_learning_2016, title = {Learning to {Rank} {Scientific} {Documents} from the {Crowd}}, url = {https://arxiv.org/pdf/1611.01400v1.pdf}, abstract = {Finding related published articles is an important task in any science, but with the explosion of new work in the biomedical domain it has become especially challenging. Most existing methodologies use text similarity metrics to identify whether two articles are related or not. However biomedical knowledge discovery is hypothesis-driven. The most related articles may not be ones with the highest text similarities. In this study, we first develop an innovative crowd-sourcing approach to build an expert-annotated document-ranking corpus. Using this corpus as the gold standard, we then evaluate the approaches of using text similarity to rank the relatedness of articles. Finally, we develop and evaluate a new supervised model to automatically rank related scientific articles. Our results show that authors' ranking differ significantly from rankings by text-similarity-based models. By training a learning-to-rank model on a subset of the annotated corpus, we found the best supervised learning-to-rank model (SVM-Rank) significantly surpassed state-of-the-art baseline systems.}, journal = {arXiv:1611.01400}, author = {Lingeman, Jesse M and Yu, Hong}, month = nov, year = {2016}, }
Finding related published articles is an important task in any science, but with the explosion of new work in the biomedical domain it has become especially challenging. Most existing methodologies use text similarity metrics to identify whether two articles are related or not. However biomedical knowledge discovery is hypothesis-driven. The most related articles may not be ones with the highest text similarities. In this study, we first develop an innovative crowd-sourcing approach to build an expert-annotated document-ranking corpus. Using this corpus as the gold standard, we then evaluate the approaches of using text similarity to rank the relatedness of articles. Finally, we develop and evaluate a new supervised model to automatically rank related scientific articles. Our results show that authors' ranking differ significantly from rankings by text-similarity-based models. By training a learning-to-rank model on a subset of the annotated corpus, we found the best supervised learning-to-rank model (SVM-Rank) significantly surpassed state-of-the-art baseline systems.
Learning for Biomedical Information Extraction: Methodological Review of Recent Advances.
Liu, F.; Chen, J.; Jagannatha, A.; and Yu, H.
arXiv:1606.07993. June 2016.
Paper
bibtex
abstract
@article{liu_learning_2016, title = {Learning for {Biomedical} {Information} {Extraction}: {Methodological} {Review} of {Recent} {Advances}}, url = {https://arxiv.org/ftp/arxiv/papers/1606/1606.07993.pdf}, abstract = {Biomedical information extraction (BioIE) is important to many applications, including clinical decision support, integrative biology, and pharmacovigilance, and therefore it has been an active research. Unlike existing reviews covering a holistic view on BioIE, this review focuses on mainly recent advances in learning based approaches, by systematically summarizing them into different aspects of methodological development. In addition, we dive into open information extraction and deep learning, two emerging and influential techniques and envision next generation of BioIE.}, journal = {arXiv:1606.07993}, author = {Liu, Feifan and Chen, Jinying and Jagannatha, Abhyuday and Yu, Hong}, month = jun, year = {2016}, }
Biomedical information extraction (BioIE) is important to many applications, including clinical decision support, integrative biology, and pharmacovigilance, and therefore it has been an active research. Unlike existing reviews covering a holistic view on BioIE, this review focuses on mainly recent advances in learning based approaches, by systematically summarizing them into different aspects of methodological development. In addition, we dive into open information extraction and deep learning, two emerging and influential techniques and envision next generation of BioIE.
Citation Analysis with Neural Attention Models.
Munkhdalai, M; Lalor, J; and Yu, H
In Proceedings of the Seventh International Workshop on Health Text Mining and Information Analysis (LOUHI) ,, pages 69–77, Austin, TX, November 2016. Association for Computational Linguistics
Paper
doi
bibtex
@inproceedings{munkhdalai_citation_2016, address = {Austin, TX}, title = {Citation {Analysis} with {Neural} {Attention} {Models}}, url = {http://www.aclweb.org/anthology/W/W16/W16-6109.pdf}, doi = {10.18653/v1/W16-6109}, booktitle = {Proceedings of the {Seventh} {International} {Workshop} on {Health} {Text} {Mining} and {Information} {Analysis} ({LOUHI}) ,}, publisher = {Association for Computational Linguistics}, author = {Munkhdalai, M and Lalor, J and Yu, H}, month = nov, year = {2016}, pages = {69--77}, }
Condensed Memory Networks for Clinical Diagnostic Inferencing.
Prakash, A.; Zhao, S.; Hasan, S. A.; Datla, V.; Lee, K.; Qadir, A.; Liu, J.; and Farri, O.
arXiv:1612.01848 [cs]. December 2016.
arXiv: 1612.01848
Paper
bibtex
abstract
@article{prakash_condensed_2016, title = {Condensed {Memory} {Networks} for {Clinical} {Diagnostic} {Inferencing}}, url = {http://arxiv.org/abs/1612.01848}, abstract = {Diagnosis of a clinical condition is a challenging task, which often requires significant medical investigation. Previous work related to diagnostic inferencing problems mostly consider multivariate observational data (e.g. physiological signals, lab tests etc.). In contrast, we explore the problem using free-text medical notes recorded in an electronic health record (EHR). Complex tasks like these can benefit from structured knowledge bases, but those are not scalable. We instead exploit raw text from Wikipedia as a knowledge source. Memory networks have been demonstrated to be effective in tasks which require comprehension of free-form text. They use the final iteration of the learned representation to predict probable classes. We introduce condensed memory neural networks (C-MemNNs), a novel model with iterative condensation of memory representations that preserves the hierarchy of features in the memory. Experiments on the MIMIC-III dataset show that the proposed model outperforms other variants of memory networks to predict the most probable diagnoses given a complex clinical scenario.}, urldate = {2017-01-12}, journal = {arXiv:1612.01848 [cs]}, author = {Prakash, Aaditya and Zhao, Siyuan and Hasan, Sadid A. and Datla, Vivek and Lee, Kathy and Qadir, Ashequl and Liu, Joey and Farri, Oladimeji}, month = dec, year = {2016}, note = {arXiv: 1612.01848}, keywords = {Computer Science - Computation and Language}, }
Diagnosis of a clinical condition is a challenging task, which often requires significant medical investigation. Previous work related to diagnostic inferencing problems mostly consider multivariate observational data (e.g. physiological signals, lab tests etc.). In contrast, we explore the problem using free-text medical notes recorded in an electronic health record (EHR). Complex tasks like these can benefit from structured knowledge bases, but those are not scalable. We instead exploit raw text from Wikipedia as a knowledge source. Memory networks have been demonstrated to be effective in tasks which require comprehension of free-form text. They use the final iteration of the learned representation to predict probable classes. We introduce condensed memory neural networks (C-MemNNs), a novel model with iterative condensation of memory representations that preserves the hierarchy of features in the memory. Experiments on the MIMIC-III dataset show that the proposed model outperforms other variants of memory networks to predict the most probable diagnoses given a complex clinical scenario.
Finding Important Terms for Patients in Their Electronic Health Records: A Learning-to-Rank Approach Using Expert Annotations.
Chen, J.; Zheng, J.; and Yu, H.
JMIR medical informatics, 4(4): e40. November 2016.
doi bibtex abstract
doi bibtex abstract
@article{chen_finding_2016, title = {Finding {Important} {Terms} for {Patients} in {Their} {Electronic} {Health} {Records}: {A} {Learning}-to-{Rank} {Approach} {Using} {Expert} {Annotations}}, volume = {4}, shorttitle = {Finding {Important} {Terms} for {Patients} in {Their} {Electronic} {Health} {Records}}, doi = {10.2196/medinform.6373}, abstract = {BACKGROUND: Many health organizations allow patients to access their own electronic health record (EHR) notes through online patient portals as a way to enhance patient-centered care. However, EHR notes are typically long and contain abundant medical jargon that can be difficult for patients to understand. In addition, many medical terms in patients' notes are not directly related to their health care needs. One way to help patients better comprehend their own notes is to reduce information overload and help them focus on medical terms that matter most to them. Interventions can then be developed by giving them targeted education to improve their EHR comprehension and the quality of care. OBJECTIVE: We aimed to develop a supervised natural language processing (NLP) system called Finding impOrtant medical Concepts most Useful to patientS (FOCUS) that automatically identifies and ranks medical terms in EHR notes based on their importance to the patients. METHODS: First, we built an expert-annotated corpus. For each EHR note, 2 physicians independently identified medical terms important to the patient. Using the physicians' agreement as the gold standard, we developed and evaluated FOCUS. FOCUS first identifies candidate terms from each EHR note using MetaMap and then ranks the terms using a support vector machine-based learn-to-rank algorithm. We explored rich learning features, including distributed word representation, Unified Medical Language System semantic type, topic features, and features derived from consumer health vocabulary. We compared FOCUS with 2 strong baseline NLP systems. RESULTS: Physicians annotated 90 EHR notes and identified a mean of 9 (SD 5) important terms per note. The Cohen's kappa annotation agreement was .51. The 10-fold cross-validation results show that FOCUS achieved an area under the receiver operating characteristic curve (AUC-ROC) of 0.940 for ranking candidate terms from EHR notes to identify important terms. When including term identification, the performance of FOCUS for identifying important terms from EHR notes was 0.866 AUC-ROC. Both performance scores significantly exceeded the corresponding baseline system scores (P{\textless}.001). Rich learning features contributed to FOCUS's performance substantially. CONCLUSIONS: FOCUS can automatically rank terms from EHR notes based on their importance to patients. It may help develop future interventions that improve quality of care.}, language = {eng}, number = {4}, journal = {JMIR medical informatics}, author = {Chen, Jinying and Zheng, Jiaping and Yu, Hong}, month = nov, year = {2016}, pmid = {27903489}, pmcid = {PMC5156821}, keywords = {Information extraction, Learning to rank, Supervised learning, electronic health records, natural language processing}, pages = {e40}, }
BACKGROUND: Many health organizations allow patients to access their own electronic health record (EHR) notes through online patient portals as a way to enhance patient-centered care. However, EHR notes are typically long and contain abundant medical jargon that can be difficult for patients to understand. In addition, many medical terms in patients' notes are not directly related to their health care needs. One way to help patients better comprehend their own notes is to reduce information overload and help them focus on medical terms that matter most to them. Interventions can then be developed by giving them targeted education to improve their EHR comprehension and the quality of care. OBJECTIVE: We aimed to develop a supervised natural language processing (NLP) system called Finding impOrtant medical Concepts most Useful to patientS (FOCUS) that automatically identifies and ranks medical terms in EHR notes based on their importance to the patients. METHODS: First, we built an expert-annotated corpus. For each EHR note, 2 physicians independently identified medical terms important to the patient. Using the physicians' agreement as the gold standard, we developed and evaluated FOCUS. FOCUS first identifies candidate terms from each EHR note using MetaMap and then ranks the terms using a support vector machine-based learn-to-rank algorithm. We explored rich learning features, including distributed word representation, Unified Medical Language System semantic type, topic features, and features derived from consumer health vocabulary. We compared FOCUS with 2 strong baseline NLP systems. RESULTS: Physicians annotated 90 EHR notes and identified a mean of 9 (SD 5) important terms per note. The Cohen's kappa annotation agreement was .51. The 10-fold cross-validation results show that FOCUS achieved an area under the receiver operating characteristic curve (AUC-ROC) of 0.940 for ranking candidate terms from EHR notes to identify important terms. When including term identification, the performance of FOCUS for identifying important terms from EHR notes was 0.866 AUC-ROC. Both performance scores significantly exceeded the corresponding baseline system scores (P\textless.001). Rich learning features contributed to FOCUS's performance substantially. CONCLUSIONS: FOCUS can automatically rank terms from EHR notes based on their importance to patients. It may help develop future interventions that improve quality of care.
EHR Note Paraphrasing for NoteAid Evaluation.
Yu, H.
In SBM, 2016.
bibtex
bibtex
@inproceedings{yu_ehr_2016, title = {{EHR} {Note} {Paraphrasing} for {NoteAid} {Evaluation}.}, booktitle = {{SBM}}, author = {Yu, Hong}, year = {2016}, }
Mismatch between Patient Information-Seeking and Physician Expectation at a Diabetes Outpatient Clinic.
Yu, H.; Makkapati, S.; Maranda, L.; and Malkani, S.
In SBM, 2016.
bibtex
bibtex
@inproceedings{yu_mismatch_2016, title = {Mismatch between {Patient} {Information}-{Seeking} and {Physician} {Expectation} at a {Diabetes} {Outpatient} {Clinic}.}, booktitle = {{SBM}}, author = {Yu, Hong and Makkapati, Shreya and Maranda, Louis and Malkani, Samir}, year = {2016}, }
2015
(8)
Translating Electronic Health Record Notes from English to Spanish: A Preliminary Study.
Liu, W.; Cai, S.; Balaji, R.; Chiriboga, G.; Knight, K.; and Yu, H.
In ACL-IJCNLP, pages 134, Bei Jing, China, July 2015.
Paper
doi
bibtex
@inproceedings{liu_translating_2015, address = {Bei Jing, China}, title = {Translating {Electronic} {Health} {Record} {Notes} from {English} to {Spanish}: {A} {Preliminary} {Study}}, url = {http://aclweb.org/anthology/W/W15/W15-3816.pdf}, doi = {10.18653/v1/W15-3816}, booktitle = {{ACL}-{IJCNLP}}, author = {Liu, Weisong and Cai, Shu and Balaji, Ramesh and Chiriboga, German and Knight, Kevin and Yu, Hong}, month = jul, year = {2015}, pages = {134}, }
Figure-Associated Text Summarization and Evaluation.
Polepalli Ramesh, B.; Sethi, R. J.; and Yu, H.
PLOS ONE, 10(2): e0115671. February 2015.
Paper
doi
bibtex
@article{polepalli_ramesh_figure-associated_2015, title = {Figure-{Associated} {Text} {Summarization} and {Evaluation}}, volume = {10}, issn = {1932-6203}, url = {http://dx.plos.org/10.1371/journal.pone.0115671}, doi = {10.1371/journal.pone.0115671}, language = {en}, number = {2}, urldate = {2015-02-26}, journal = {PLOS ONE}, author = {Polepalli Ramesh, Balaji and Sethi, Ricky J. and Yu, Hong}, editor = {Sarkar, Indra Neil}, month = feb, year = {2015}, pmid = {25643357 PMCID: PMC4313946}, pages = {e0115671}, }
DeTEXT: A Database for Evaluating Text Extraction from Biomedical Literature Figures.
Yin, X.; Yang, C.; Pei, W.; Man, H.; Zhang, J.; Learned-Miller, E.; and Yu, H.
PLoS ONE, 10(5). May 2015.
Paper
doi
bibtex
abstract
@article{yin_detext:_2015, title = {{DeTEXT}: {A} {Database} for {Evaluating} {Text} {Extraction} from {Biomedical} {Literature} {Figures}}, volume = {10}, issn = {1932-6203}, shorttitle = {{DeTEXT}}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4423993/}, doi = {10.1371/journal.pone.0126200}, abstract = {Hundreds of millions of figures are available in biomedical literature, representing important biomedical experimental evidence. Since text is a rich source of information in figures, automatically extracting such text may assist in the task of mining figure information. A high-quality ground truth standard can greatly facilitate the development of an automated system. This article describes DeTEXT: A database for evaluating text extraction from biomedical literature figures. It is the first publicly available, human-annotated, high quality, and large-scale figure-text dataset with 288 full-text articles, 500 biomedical figures, and 9308 text regions. This article describes how figures were selected from open-access full-text biomedical articles and how annotation guidelines and annotation tools were developed. We also discuss the inter-annotator agreement and the reliability of the annotations. We summarize the statistics of the DeTEXT data and make available evaluation protocols for DeTEXT. Finally we lay out challenges we observed in the automated detection and recognition of figure text and discuss research directions in this area. DeTEXT is publicly available for downloading at http://prir.ustb.edu.cn/DeTEXT/.}, number = {5}, urldate = {2015-06-03}, journal = {PLoS ONE}, author = {Yin, Xu-Cheng and Yang, Chun and Pei, Wei-Yi and Man, Haixia and Zhang, Jun and Learned-Miller, Erik and Yu, Hong}, month = may, year = {2015}, pmid = {25951377 PMCID: PMC4423993}, }
Hundreds of millions of figures are available in biomedical literature, representing important biomedical experimental evidence. Since text is a rich source of information in figures, automatically extracting such text may assist in the task of mining figure information. A high-quality ground truth standard can greatly facilitate the development of an automated system. This article describes DeTEXT: A database for evaluating text extraction from biomedical literature figures. It is the first publicly available, human-annotated, high quality, and large-scale figure-text dataset with 288 full-text articles, 500 biomedical figures, and 9308 text regions. This article describes how figures were selected from open-access full-text biomedical articles and how annotation guidelines and annotation tools were developed. We also discuss the inter-annotator agreement and the reliability of the annotations. We summarize the statistics of the DeTEXT data and make available evaluation protocols for DeTEXT. Finally we lay out challenges we observed in the automated detection and recognition of figure text and discuss research directions in this area. DeTEXT is publicly available for downloading at http://prir.ustb.edu.cn/DeTEXT/.
Methods for Linking EHR Notes to Education Materials.
Zheng, J.; and Yu, H.
AMIA Joint Summits on Translational Science proceedings AMIA Summit on Translational Science, 2015: 209–215. 2015.
bibtex abstract
bibtex abstract
@article{zheng_methods_2015, title = {Methods for {Linking} {EHR} {Notes} to {Education} {Materials}}, volume = {2015}, issn = {2153-4063}, abstract = {It has been shown that providing patients with access to their own electronic health records (EHR) can enhance their medical understanding and provide clinically relevant benefits. However, languages that are difficult for non-medical professionals to comprehend are prevalent in the EHR notes, including medical terms, abbreviations, and domain-specific language patterns. Furthermore, limited average health literacy forms a barrier for patients to understand their health condition, impeding their ability to actively participate in managing their health. Therefore, we are developing a system to retrieve EHR note-tailored online consumer-oriented health education materials to improve patients' health knowledge of their own clinical conditions. Our experiments show that queries combining key concepts and other medical concepts present in the EHR notes significantly outperform (more than doubled) a baseline system of using the phrases from topic models.}, language = {eng}, journal = {AMIA Joint Summits on Translational Science proceedings AMIA Summit on Translational Science}, author = {Zheng, Jiaping and Yu, Hong}, year = {2015}, pmid = {26306273}, pmcid = {PMC4525231}, pages = {209--215}, }
It has been shown that providing patients with access to their own electronic health records (EHR) can enhance their medical understanding and provide clinically relevant benefits. However, languages that are difficult for non-medical professionals to comprehend are prevalent in the EHR notes, including medical terms, abbreviations, and domain-specific language patterns. Furthermore, limited average health literacy forms a barrier for patients to understand their health condition, impeding their ability to actively participate in managing their health. Therefore, we are developing a system to retrieve EHR note-tailored online consumer-oriented health education materials to improve patients' health knowledge of their own clinical conditions. Our experiments show that queries combining key concepts and other medical concepts present in the EHR notes significantly outperform (more than doubled) a baseline system of using the phrases from topic models.
Identifying Key Concepts from EHR Notes Using Domain Adaptation.
Zheng, J.; Yu, H.; and Bedford, M. A.
In SIXTH INTERNATIONAL WORKSHOP ON HEALTH TEXT MINING AND INFORMATION ANALYSIS (LOUHI), pages 115, 2015.
Paper
bibtex
@inproceedings{zheng_identifying_2015, title = {Identifying {Key} {Concepts} from {EHR} {Notes} {Using} {Domain} {Adaptation}}, url = {http://www.anthology.aclweb.org/W/W15/W15-26.pdf#page=127}, urldate = {2017-02-23}, booktitle = {{SIXTH} {INTERNATIONAL} {WORKSHOP} {ON} {HEALTH} {TEXT} {MINING} {AND} {INFORMATION} {ANALYSIS} ({LOUHI})}, author = {Zheng, Jiaping and Yu, Hong and Bedford, M. A.}, year = {2015}, pages = {115}, }
Improving Concept Identification for linking EHR notes to education materials.
Zheng, J; and Yu, H.
In Empirical Methods in Natural Language Processing, Lisboa, Portugal, 2015.
bibtex
bibtex
@inproceedings{zheng_improving_2015, address = {Lisboa, Portugal}, title = {Improving {Concept} {Identification} for linking {EHR} notes to education materials.}, booktitle = {Empirical {Methods} in {Natural} {Language} {Processing}}, author = {Zheng, J and Yu, Hong}, year = {2015}, }
Towards Mining Electronic Health Records for Opioid ADE Surveillance.
Yu, H; Brandt, C; Becker, W; and Kem, R
In The 2015 HSR&D/QUERI National Conference, 2015.
Paper
bibtex
abstract
@inproceedings{yu_towards_2015, title = {Towards {Mining} {Electronic} {Health} {Records} for {Opioid} {ADE} {Surveillance}}, url = {http://www.hsrd.research.va.gov/meetings/2015/abstract-display.cfm?RecordID=200}, abstract = {Objectives: Prescription opioids are commonly used to treat acute and cancer-related pain, and, over the last two decades, have increasingly been used in the management of chronic non-cancer pain. Patients taking opioids can experience a wide range of adverse drug events (ADEs), including constipation, nausea/vomiting, pruritus, drowsiness and dizziness, hormonal dysfunction, depression, oversedation, falls, fractures, addiction, overdose, respiratory depression, sleep-disordered breathing, and death. Since such ADEs are frequently described in the unstructured electronic health record (EHR) notes, we are developing natural language processing (NLP) system to automatically extract opioid and ADEs from EHRs. The purpose of this study was to test out the feasibility of mining EHR notes for ADE detection using NLP approaches. Methods: We developed an annotation guideline using an interactive process during which physicians and linguists worked together to define rules and resolve discrepancy. Following the guideline, two annotators annotated 150 discharge summaries (or 8,672 sentences comprising 102,807 word tokens). The overall pairwise annotation agreement was 88\%. The total number of annotated ADEs and medications were 103 and 3,290. Using this annotated corpus, we developed a NLP system to detect medication and ADE information. Our NLP system is trained on the supervised machine learning model Conditional Random Fields. We compared our NLP system with the state-of-the-art NLP system the MetaMap for ADE detection. Results: NLP performed well on discharge summaries on certain named entities, including frequency (92\% F1), route (89\% F1), dosage (87\% F1), and medication (84\% F1). Because the number of ADE instances is small, NLP performed poorly on ADE (24\% F1). MetaMap performed on average 62\% F1 for medication and 4\% F1 for ADE. Implications: Our NLP system outperformed MetaMap for EHR notes ADE detection. NLP generally performs well with a sufficient size of annotated data. While the performance of ADE detection is low, more annotated data yielding a higher prevalence of ADEs would likely improve opioid ADE detection. Use of larger datasets is underway. Impacts: NLP has the potential to improve understanding of the nature and prevalence of opioid ADEs and, ultimately, advance the field of medication safety.}, booktitle = {The 2015 {HSR}\&{D}/{QUERI} {National} {Conference}}, author = {Yu, H and Brandt, C and Becker, W and Kem, R}, year = {2015}, }
Objectives: Prescription opioids are commonly used to treat acute and cancer-related pain, and, over the last two decades, have increasingly been used in the management of chronic non-cancer pain. Patients taking opioids can experience a wide range of adverse drug events (ADEs), including constipation, nausea/vomiting, pruritus, drowsiness and dizziness, hormonal dysfunction, depression, oversedation, falls, fractures, addiction, overdose, respiratory depression, sleep-disordered breathing, and death. Since such ADEs are frequently described in the unstructured electronic health record (EHR) notes, we are developing natural language processing (NLP) system to automatically extract opioid and ADEs from EHRs. The purpose of this study was to test out the feasibility of mining EHR notes for ADE detection using NLP approaches. Methods: We developed an annotation guideline using an interactive process during which physicians and linguists worked together to define rules and resolve discrepancy. Following the guideline, two annotators annotated 150 discharge summaries (or 8,672 sentences comprising 102,807 word tokens). The overall pairwise annotation agreement was 88%. The total number of annotated ADEs and medications were 103 and 3,290. Using this annotated corpus, we developed a NLP system to detect medication and ADE information. Our NLP system is trained on the supervised machine learning model Conditional Random Fields. We compared our NLP system with the state-of-the-art NLP system the MetaMap for ADE detection. Results: NLP performed well on discharge summaries on certain named entities, including frequency (92% F1), route (89% F1), dosage (87% F1), and medication (84% F1). Because the number of ADE instances is small, NLP performed poorly on ADE (24% F1). MetaMap performed on average 62% F1 for medication and 4% F1 for ADE. Implications: Our NLP system outperformed MetaMap for EHR notes ADE detection. NLP generally performs well with a sufficient size of annotated data. While the performance of ADE detection is low, more annotated data yielding a higher prevalence of ADEs would likely improve opioid ADE detection. Use of larger datasets is underway. Impacts: NLP has the potential to improve understanding of the nature and prevalence of opioid ADEs and, ultimately, advance the field of medication safety.
Learning to rank scientific articles.
Lingerman, J; and Hong, Y.
In AMIA Fall Symposium, 2015.
Paper
bibtex
@inproceedings{lingerman_learning_2015, title = {Learning to rank scientific articles.}, url = {https://arxiv.org/pdf/1611.01400.pdf}, booktitle = {{AMIA} {Fall} {Symposium}}, author = {Lingerman, J and Hong, Yu}, year = {2015}, }
2014
(4)
Learning to Rank Figures within a Biomedical Article.
Liu, F.; and Yu, H.
PLoS ONE, 9(3): e61567. March 2014.
Paper
doi
bibtex
abstract
@article{liu_learning_2014, title = {Learning to {Rank} {Figures} within a {Biomedical} {Article}}, volume = {9}, issn = {1932-6203}, url = {http://dx.plos.org/10.1371/journal.pone.0061567}, doi = {10.1371/journal.pone.0061567}, abstract = {Hundreds of millions of figures are available in biomedical literature, representing important biomedical experimental evidence. This ever-increasing sheer volume has made it difficult for scientists to effectively and accurately access figures of their interest, the process of which is crucial for validating research facts and for formulating or testing novel research hypotheses. Current figure search applications can't fully meet this challenge as the "bag of figures" assumption doesn't take into account the relationship among figures. In our previous study, hundreds of biomedical researchers have annotated articles in which they serve as corresponding authors. They ranked each figure in their paper based on a figure's importance at their discretion, referred to as "figure ranking". Using this collection of annotated data, we investigated computational approaches to automatically rank figures. We exploited and extended the state-of-the-art listwise learning-to-rank algorithms and developed a new supervised-learning model BioFigRank. The cross-validation results show that BioFigRank yielded the best performance compared with other state-of-the-art computational models, and the greedy feature selection can further boost the ranking performance significantly. Furthermore, we carry out the evaluation by comparing BioFigRank with three-level competitive domain-specific human experts: (1) First Author, (2) Non-Author-In-Domain-Expert who is not the author nor co-author of an article but who works in the same field of the corresponding author of the article, and (3) Non-Author-Out-Domain-Expert who is not the author nor co-author of an article and who may or may not work in the same field of the corresponding author of an article. Our results show that BioFigRank outperforms Non-Author-Out-Domain-Expert and performs as well as Non-Author-In-Domain-Expert. Although BioFigRank underperforms First Author, since most biomedical researchers are either in- or out-domain-experts for an article, we conclude that BioFigRank represents an artificial intelligence system that offers expert-level intelligence to help biomedical researchers to navigate increasingly proliferated big data efficiently.}, language = {en}, number = {3}, urldate = {2015-02-26}, journal = {PLoS ONE}, author = {Liu, Feifan and Yu, Hong}, editor = {Preis, Tobias}, month = mar, year = {2014}, pmid = {24625719 PMCID: PMC3953065}, pages = {e61567}, }
Hundreds of millions of figures are available in biomedical literature, representing important biomedical experimental evidence. This ever-increasing sheer volume has made it difficult for scientists to effectively and accurately access figures of their interest, the process of which is crucial for validating research facts and for formulating or testing novel research hypotheses. Current figure search applications can't fully meet this challenge as the "bag of figures" assumption doesn't take into account the relationship among figures. In our previous study, hundreds of biomedical researchers have annotated articles in which they serve as corresponding authors. They ranked each figure in their paper based on a figure's importance at their discretion, referred to as "figure ranking". Using this collection of annotated data, we investigated computational approaches to automatically rank figures. We exploited and extended the state-of-the-art listwise learning-to-rank algorithms and developed a new supervised-learning model BioFigRank. The cross-validation results show that BioFigRank yielded the best performance compared with other state-of-the-art computational models, and the greedy feature selection can further boost the ranking performance significantly. Furthermore, we carry out the evaluation by comparing BioFigRank with three-level competitive domain-specific human experts: (1) First Author, (2) Non-Author-In-Domain-Expert who is not the author nor co-author of an article but who works in the same field of the corresponding author of the article, and (3) Non-Author-Out-Domain-Expert who is not the author nor co-author of an article and who may or may not work in the same field of the corresponding author of an article. Our results show that BioFigRank outperforms Non-Author-Out-Domain-Expert and performs as well as Non-Author-In-Domain-Expert. Although BioFigRank underperforms First Author, since most biomedical researchers are either in- or out-domain-experts for an article, we conclude that BioFigRank represents an artificial intelligence system that offers expert-level intelligence to help biomedical researchers to navigate increasingly proliferated big data efficiently.
Computational Approaches for Predicting Biomedical Research Collaborations.
Zhang, Q.; and Yu, H.
PLoS ONE, 9(11): e111795. November 2014.
Paper
doi
bibtex
abstract
@article{zhang_computational_2014, title = {Computational {Approaches} for {Predicting} {Biomedical} {Research} {Collaborations}}, volume = {9}, issn = {1932-6203}, url = {http://dx.plos.org/10.1371/journal.pone.0111795}, doi = {10.1371/journal.pone.0111795}, abstract = {Biomedical research is increasingly collaborative, and successful collaborations often produce high impact work. Computational approaches can be developed for automatically predicting biomedical research collaborations. Previous works of collaboration prediction mainly explored the topological structures of research collaboration networks, leaving out rich semantic information from the publications themselves. In this paper, we propose supervised machine learning approaches to predict research collaborations in the biomedical field. We explored both the semantic features extracted from author research interest profile and the author network topological features. We found that the most informative semantic features for author collaborations are related to research interest, including similarity of out-citing citations, similarity of abstracts. Of the four supervised machine learning models (naïve Bayes, naïve Bayes multinomial, SVMs, and logistic regression), the best performing model is logistic regression with an ROC ranging from 0.766 to 0.980 on different datasets. To our knowledge we are the first to study in depth how research interest and productivities can be used for collaboration prediction. Our approach is computationally efficient, scalable and yet simple to implement. The datasets of this study are available at https://github.com/qingzhanggithub/medline-collaboration-datasets.}, language = {en}, number = {11}, urldate = {2015-02-26}, journal = {PLoS ONE}, author = {Zhang, Qing and Yu, Hong}, editor = {Smalheiser, Neil R.}, month = nov, year = {2014}, pmid = {25375164 PMCID: PMC4222920}, pages = {e111795}, }
Biomedical research is increasingly collaborative, and successful collaborations often produce high impact work. Computational approaches can be developed for automatically predicting biomedical research collaborations. Previous works of collaboration prediction mainly explored the topological structures of research collaboration networks, leaving out rich semantic information from the publications themselves. In this paper, we propose supervised machine learning approaches to predict research collaborations in the biomedical field. We explored both the semantic features extracted from author research interest profile and the author network topological features. We found that the most informative semantic features for author collaborations are related to research interest, including similarity of out-citing citations, similarity of abstracts. Of the four supervised machine learning models (naïve Bayes, naïve Bayes multinomial, SVMs, and logistic regression), the best performing model is logistic regression with an ROC ranging from 0.766 to 0.980 on different datasets. To our knowledge we are the first to study in depth how research interest and productivities can be used for collaboration prediction. Our approach is computationally efficient, scalable and yet simple to implement. The datasets of this study are available at https://github.com/qingzhanggithub/medline-collaboration-datasets.
Automatically Recognizing Medication and Adverse Event Information From Food and Drug Administration’s Adverse Event Reporting System Narratives.
Polepalli Ramesh, B.; Belknap, S. M; Li, Z.; Frid, N.; West, D. P; and Yu, H.
JMIR Medical Informatics, 2(1): e10. June 2014.
Paper
doi
bibtex
@article{polepalli_ramesh_automatically_2014, title = {Automatically {Recognizing} {Medication} and {Adverse} {Event} {Information} {From} {Food} and {Drug} {Administration}’s {Adverse} {Event} {Reporting} {System} {Narratives}}, volume = {2}, issn = {2291-9694}, url = {http://medinform.jmir.org/2014/1/e10/}, doi = {10.2196/medinform.3022}, language = {en}, number = {1}, urldate = {2015-05-02}, journal = {JMIR Medical Informatics}, author = {Polepalli Ramesh, Balaji and Belknap, Steven M and Li, Zuofeng and Frid, Nadya and West, Dennis P and Yu, Hong}, month = jun, year = {2014}, pmid = {25600332}, pmcid = {PMC4288072}, pages = {e10}, }
A robust data-driven approach for gene ontology annotation.
Li, Y.; and Yu, H.
Database: The Journal of Biological Databases and Curation, 2014: bau113. 2014.
00000
Paper
doi
bibtex
abstract
@article{li_robust_2014, title = {A robust data-driven approach for gene ontology annotation}, volume = {2014}, issn = {1758-0463}, url = {http://database.oxfordjournals.org/cgi/doi/10.1093/database/bau113}, doi = {10.1093/database/bau113}, abstract = {Gene ontology (GO) and GO annotation are important resources for biological information management and knowledge discovery, but the speed of manual annotation became a major bottleneck of database curation. BioCreative IV GO annotation task aims to evaluate the performance of system that automatically assigns GO terms to genes based on the narrative sentences in biomedical literature. This article presents our work in this task as well as the experimental results after the competition. For the evidence sentence extraction subtask, we built a binary classifier to identify evidence sentences using reference distance estimator (RDE), a recently proposed semi-supervised learning method that learns new features from around 10 million unlabeled sentences, achieving an F1 of 19.3\% in exact match and 32.5\% in relaxed match. In the post-submission experiment, we obtained 22.1\% and 35.7\% F1 performance by incorporating bigram features in RDE learning. In both development and test sets, RDE-based method achieved over 20\% relative improvement on F1 and AUC performance against classical supervised learning methods, e.g. support vector machine and logistic regression. For the GO term prediction subtask, we developed an information retrieval-based method to retrieve the GO term most relevant to each evidence sentence using a ranking function that combined cosine similarity and the frequency of GO terms in documents, and a filtering method based on high-level GO classes. The best performance of our submitted runs was 7.8\% F1 and 22.2\% hierarchy F1. We found that the incorporation of frequency information and hierarchy filtering substantially improved the performance. In the post-submission evaluation, we obtained a 10.6\% F1 using a simpler setting. Overall, the experimental analysis showed our approaches were robust in both the two tasks.}, language = {eng}, journal = {Database: The Journal of Biological Databases and Curation}, author = {Li, Yanpeng and Yu, Hong}, year = {2014}, pmid = {25425037}, pmcid = {PMC4243380}, note = {00000 }, pages = {bau113}, }
Gene ontology (GO) and GO annotation are important resources for biological information management and knowledge discovery, but the speed of manual annotation became a major bottleneck of database curation. BioCreative IV GO annotation task aims to evaluate the performance of system that automatically assigns GO terms to genes based on the narrative sentences in biomedical literature. This article presents our work in this task as well as the experimental results after the competition. For the evidence sentence extraction subtask, we built a binary classifier to identify evidence sentences using reference distance estimator (RDE), a recently proposed semi-supervised learning method that learns new features from around 10 million unlabeled sentences, achieving an F1 of 19.3% in exact match and 32.5% in relaxed match. In the post-submission experiment, we obtained 22.1% and 35.7% F1 performance by incorporating bigram features in RDE learning. In both development and test sets, RDE-based method achieved over 20% relative improvement on F1 and AUC performance against classical supervised learning methods, e.g. support vector machine and logistic regression. For the GO term prediction subtask, we developed an information retrieval-based method to retrieve the GO term most relevant to each evidence sentence using a ranking function that combined cosine similarity and the frequency of GO terms in documents, and a filtering method based on high-level GO classes. The best performance of our submitted runs was 7.8% F1 and 22.2% hierarchy F1. We found that the incorporation of frequency information and hierarchy filtering substantially improved the performance. In the post-submission evaluation, we obtained a 10.6% F1 using a simpler setting. Overall, the experimental analysis showed our approaches were robust in both the two tasks.
2013
(2)
Systems for Improving Electronic Health Record Note Comprehension.
Polepalli Ramesh, B.; and Yu, H.
In ACM SIGIR Workshop on Health Search & Discovery, 2013.
Paper
bibtex
abstract
@inproceedings{polepalli_ramesh_systems_2013, title = {Systems for {Improving} {Electronic} {Health} {Record} {Note} {Comprehension}}, url = {https://research.nuance.com/wp-content/uploads/2014/12/Systems-for-Improving-Electronic-Health-Record-Note-Comprehension.pdf}, abstract = {Allowing patients access to their physicians’ notes has the potential to enhance their understanding of disease and improve medication adherence and healthcare outcomes. However, a recent study involving over ten thousand patients showed that allowing patients to read their electronic health record (EHR) notes caused confusion, especially for the vulnerable (e.g., lower literacy, lower income) groups. This finding is not surprising as EHR notes contain medical jargon that may be difficult for patients to comprehend. To improve patients’ EHR note comprehension, we are developing a biomedical natural language processing system called NoteAid (http://clinicalnotesaid.org), which translates medical jargon into consumer-oriented lay language. The current NoteAid implementations link EHR medical terms to their definitions and other related educational material. Our evaluation has shown that all NoteAid implementations improve self-rated EHR note comprehension by 23\% to 40\% of lay people.}, booktitle = {{ACM} {SIGIR} {Workshop} on {Health} {Search} \& {Discovery}}, author = {Polepalli Ramesh, Balaji and Yu, Hong}, year = {2013}, }
Allowing patients access to their physicians’ notes has the potential to enhance their understanding of disease and improve medication adherence and healthcare outcomes. However, a recent study involving over ten thousand patients showed that allowing patients to read their electronic health record (EHR) notes caused confusion, especially for the vulnerable (e.g., lower literacy, lower income) groups. This finding is not surprising as EHR notes contain medical jargon that may be difficult for patients to comprehend. To improve patients’ EHR note comprehension, we are developing a biomedical natural language processing system called NoteAid (http://clinicalnotesaid.org), which translates medical jargon into consumer-oriented lay language. The current NoteAid implementations link EHR medical terms to their definitions and other related educational material. Our evaluation has shown that all NoteAid implementations improve self-rated EHR note comprehension by 23% to 40% of lay people.
CiteGraph: A Citation Network System for MEDLINE Articles and Analysis.
Qing, Z.; and Hong, Y.
Studies in Health Technology and Informatics,832–836. 2013.
Paper
doi
bibtex
abstract
@article{qing_citegraph:_2013, title = {{CiteGraph}: {A} {Citation} {Network} {System} for {MEDLINE} {Articles} and {Analysis}}, copyright = {©2013 © IMIA and IOS Press.}, issn = {0926-9630}, shorttitle = {{CiteGraph}}, url = {http://www.medra.org/servlet/aliasResolver?alias=iospressISSNISBN&issn=0926-9630&volume=192&spage=832}, doi = {10.3233/978-1-61499-289-9-832}, abstract = {This paper details the development and implementation of CiteGraph, a system for constructing large-scale citation and co-authorship networks from full-text biomedical articles. CiteGraph represents articles and authors by uniquely identified nodes, and connects those nodes through citation and co-authorship relations. CiteGraph network encompasses over 1.65 million full-text articles and 6.35 million citations by 1.37 million unique authors from the Elsevier full-text articles. Our evaluation shows 98\% 99\% F1-score for mapping a citation to the corresponding article and identifying MEDLINE articles. We further analyzed the characteristics of CiteGraph and found that they are consistent with assumptions made using small-scale bibliometric analysis. We also developed several novel network-based methods for analyzing publication, citation and collaboration patterns. This is the first work to develop a completely automated system for the creation of a large-scale citation network in the biomedical domain, and also to introduce novel findings in researcher publication histories. CiteGraph can be a useful resource to both the biomedical community, and bibliometric research.}, urldate = {2016-11-30}, journal = {Studies in Health Technology and Informatics}, author = {Qing, Zhang and Hong, Yu}, year = {2013}, pmid = {23920674}, pages = {832--836}, }
This paper details the development and implementation of CiteGraph, a system for constructing large-scale citation and co-authorship networks from full-text biomedical articles. CiteGraph represents articles and authors by uniquely identified nodes, and connects those nodes through citation and co-authorship relations. CiteGraph network encompasses over 1.65 million full-text articles and 6.35 million citations by 1.37 million unique authors from the Elsevier full-text articles. Our evaluation shows 98% 99% F1-score for mapping a citation to the corresponding article and identifying MEDLINE articles. We further analyzed the characteristics of CiteGraph and found that they are consistent with assumptions made using small-scale bibliometric analysis. We also developed several novel network-based methods for analyzing publication, citation and collaboration patterns. This is the first work to develop a completely automated system for the creation of a large-scale citation network in the biomedical domain, and also to introduce novel findings in researcher publication histories. CiteGraph can be a useful resource to both the biomedical community, and bibliometric research.
2012
(4)
Beyond Captions: Linking Figures with Abstract Sentences in Biomedical Articles.
Bockhorst, J. P.; Conroy, J. M.; Agarwal, S.; O’Leary, D. P.; and Yu, H.
PLoS ONE, 7(7): e39618. July 2012.
Paper
doi
bibtex
@article{bockhorst_beyond_2012, title = {Beyond {Captions}: {Linking} {Figures} with {Abstract} {Sentences} in {Biomedical} {Articles}}, volume = {7}, issn = {1932-6203}, shorttitle = {Beyond {Captions}}, url = {http://dx.plos.org/10.1371/journal.pone.0039618}, doi = {10.1371/journal.pone.0039618}, language = {en}, number = {7}, urldate = {2016-11-30}, journal = {PLoS ONE}, author = {Bockhorst, Joseph P. and Conroy, John M. and Agarwal, Shashank and O’Leary, Dianne P. and Yu, Hong}, editor = {Ouzounis, Christos A.}, month = jul, year = {2012}, pmid = {22815711}, pmcid = {PMC3399876}, pages = {e39618}, }
Automatic discourse connective detection in biomedical text.
Ramesh, B. P.; Prasad, R.; Miller, T.; Harrington, B.; and Yu, H.
Journal of the American Medical Informatics Association: JAMIA, 19(5): 800–808. October 2012.
doi bibtex abstract
doi bibtex abstract
@article{ramesh_automatic_2012, title = {Automatic discourse connective detection in biomedical text}, volume = {19}, issn = {1527-974X}, doi = {10.1136/amiajnl-2011-000775}, abstract = {OBJECTIVE Relation extraction in biomedical text mining systems has largely focused on identifying clause-level relations, but increasing sophistication demands the recognition of relations at discourse level. A first step in identifying discourse relations involves the detection of discourse connectives: words or phrases used in text to express discourse relations. In this study supervised machine-learning approaches were developed and evaluated for automatically identifying discourse connectives in biomedical text. MATERIALS AND METHODS Two supervised machine-learning models (support vector machines and conditional random fields) were explored for identifying discourse connectives in biomedical literature. In-domain supervised machine-learning classifiers were trained on the Biomedical Discourse Relation Bank, an annotated corpus of discourse relations over 24 full-text biomedical articles ({\textasciitilde}112,000 word tokens), a subset of the GENIA corpus. Novel domain adaptation techniques were also explored to leverage the larger open-domain Penn Discourse Treebank ({\textasciitilde}1 million word tokens). The models were evaluated using the standard evaluation metrics of precision, recall and F1 scores. RESULTS AND CONCLUSION Supervised machine-learning approaches can automatically identify discourse connectives in biomedical text, and the novel domain adaptation techniques yielded the best performance: 0.761 F1 score. A demonstration version of the fully implemented classifier BioConn is available at: http://bioconn.askhermes.org.}, number = {5}, journal = {Journal of the American Medical Informatics Association: JAMIA}, author = {Ramesh, Balaji Polepalli and Prasad, Rashmi and Miller, Tim and Harrington, Brian and Yu, Hong}, month = oct, year = {2012}, pmid = {22744958}, keywords = {Knowledge Bases, NLP, analysis, automated learning, controlled terminologies and vocabularies, discovery, display, image representation, knowledge acquisition and knowledge management, knowledge representations, natural language processing, ontologies, processing, text and data mining methods}, pages = {800--808}, }
OBJECTIVE Relation extraction in biomedical text mining systems has largely focused on identifying clause-level relations, but increasing sophistication demands the recognition of relations at discourse level. A first step in identifying discourse relations involves the detection of discourse connectives: words or phrases used in text to express discourse relations. In this study supervised machine-learning approaches were developed and evaluated for automatically identifying discourse connectives in biomedical text. MATERIALS AND METHODS Two supervised machine-learning models (support vector machines and conditional random fields) were explored for identifying discourse connectives in biomedical literature. In-domain supervised machine-learning classifiers were trained on the Biomedical Discourse Relation Bank, an annotated corpus of discourse relations over 24 full-text biomedical articles (\textasciitilde112,000 word tokens), a subset of the GENIA corpus. Novel domain adaptation techniques were also explored to leverage the larger open-domain Penn Discourse Treebank (\textasciitilde1 million word tokens). The models were evaluated using the standard evaluation metrics of precision, recall and F1 scores. RESULTS AND CONCLUSION Supervised machine-learning approaches can automatically identify discourse connectives in biomedical text, and the novel domain adaptation techniques yielded the best performance: 0.761 F1 score. A demonstration version of the fully implemented classifier BioConn is available at: http://bioconn.askhermes.org.
Natural Language Processing, Electronic Health Records, and Clinical Research.
Liu, F.; Weng, C.; and Yu, H.
In Clinical Research Informatics, pages 293–310. Springer London, 2012.
bibtex
bibtex
@incollection{liu_natural_2012, title = {Natural {Language} {Processing}, {Electronic} {Health} {Records}, and {Clinical} {Research}}, booktitle = {Clinical {Research} {Informatics}}, publisher = {Springer London}, author = {Liu, Feifan and Weng, Chunhua and Yu, Hong}, year = {2012}, pages = {293--310}, }
MedTxting: learning based and knowledge rich SMS-style medical text contraction.
Liu, F.; Moosavinasab, S.; Houston, T. K.; and Yu, H.
AMIA ... Annual Symposium proceedings. AMIA Symposium, 2012: 558–567. 2012.
Paper
bibtex
abstract
@article{liu_medtxting:_2012, title = {{MedTxting}: learning based and knowledge rich {SMS}-style medical text contraction}, volume = {2012}, issn = {1942-597X}, shorttitle = {{MedTxting}}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3540574/}, abstract = {In mobile health (M-health), Short Message Service (SMS) has shown to improve disease related self-management and health service outcomes, leading to enhanced patient care. However, the hard limit on character size for each message limits the full value of exploring SMS communication in health care practices. To overcome this problem and improve the efficiency of clinical workflow, we developed an innovative system, MedTxting (available at http://medtxting.askhermes.org), which is a learning-based but knowledge-rich system that compresses medical texts in a SMS style. Evaluations on clinical questions and discharge summary narratives show that MedTxting can effectively compress medical texts with reasonable readability and noticeable size reduction. Findings in this work reveal potentials of MedTxting to the clinical settings, allowing for real-time and cost-effective communication, such as patient condition reporting, medication consulting, physicians connecting to share expertise to improve point of care.}, language = {ENG}, journal = {AMIA ... Annual Symposium proceedings. AMIA Symposium}, author = {Liu, Feifan and Moosavinasab, Soheil and Houston, Thomas K. and Yu, Hong}, year = {2012}, pmid = {23304328}, pmcid = {PMC3540574}, keywords = {Humans, Pilot Projects, Telemedicine, Text Messaging}, pages = {558--567}, }
In mobile health (M-health), Short Message Service (SMS) has shown to improve disease related self-management and health service outcomes, leading to enhanced patient care. However, the hard limit on character size for each message limits the full value of exploring SMS communication in health care practices. To overcome this problem and improve the efficiency of clinical workflow, we developed an innovative system, MedTxting (available at http://medtxting.askhermes.org), which is a learning-based but knowledge-rich system that compresses medical texts in a SMS style. Evaluations on clinical questions and discharge summary narratives show that MedTxting can effectively compress medical texts with reasonable readability and noticeable size reduction. Findings in this work reveal potentials of MedTxting to the clinical settings, allowing for real-time and cost-effective communication, such as patient condition reporting, medication consulting, physicians connecting to share expertise to improve point of care.
2011
(12)
BioN∅T: A searchable database of biomedical negated sentences.
Agarwal, S.; Yu, H.; and Kohane, I.
BMC Bioinformatics, 12(1): 420. 2011.
Paper
doi
bibtex
@article{agarwal_biont_2011, title = {{BioN}∅{T}: {A} searchable database of biomedical negated sentences}, volume = {12}, issn = {1471-2105}, shorttitle = {{BioN}∅{T}}, url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-420}, doi = {10.1186/1471-2105-12-420}, language = {en}, number = {1}, urldate = {2016-11-30}, journal = {BMC Bioinformatics}, author = {Agarwal, Shashank and Yu, Hong and Kohane, Issac}, year = {2011}, pmid = {22032181 PMCID: PMC3225379}, pages = {420}, }
AskHERMES: An online question answering system for complex clinical questions.
Cao, Y.; Liu, F.; Simpson, P.; Antieau, L.; Bennett, A.; Cimino, J. J; Ely, J.; and Yu, H.
Journal of Biomedical Informatics, 44(2): 277–288. April 2011.
Paper
doi
bibtex
abstract
@article{cao_askhermes_2011, title = {{AskHERMES}: {An} online question answering system for complex clinical questions}, volume = {44}, issn = {1532-0480}, shorttitle = {{AskHERMES}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/21256977}, doi = {10.1016/j.jbi.2011.01.004}, abstract = {{\textless}AbstractText Label="OBJECTIVE" NlmCategory="OBJECTIVE"{\textgreater}Clinical questions are often long and complex and take many forms. We have built a clinical question answering system named AskHERMES to perform robust semantic analysis on complex clinical questions and output question-focused extractive summaries as answers.{\textless}/AbstractText{\textgreater} {\textless}AbstractText Label="DESIGN" NlmCategory="METHODS"{\textgreater}This paper describes the system architecture and a preliminary evaluation of AskHERMES, which implements innovative approaches in question analysis, summarization, and answer presentation. Five types of resources were indexed in this system: MEDLINE abstracts, PubMed Central full-text articles, eMedicine documents, clinical guidelines and Wikipedia articles.{\textless}/AbstractText{\textgreater} {\textless}AbstractText Label="MEASUREMENT" NlmCategory="METHODS"{\textgreater}We compared the AskHERMES system with Google (Google and Google Scholar) and UpToDate and asked physicians to score the three systems by ease of use, quality of answer, time spent, and overall performance.{\textless}/AbstractText{\textgreater} {\textless}AbstractText Label="RESULTS" NlmCategory="RESULTS"{\textgreater}AskHERMES allows physicians to enter a question in a natural way with minimal query formulation and allows physicians to efficiently navigate among all the answer sentences to quickly meet their information needs. In contrast, physicians need to formulate queries to search for information in Google and UpToDate. The development of the AskHERMES system is still at an early stage, and the knowledge resource is limited compared with Google or UpToDate. Nevertheless, the evaluation results show that AskHERMES' performance is comparable to the other systems. In particular, when answering complex clinical questions, it demonstrates the potential to outperform both Google and UpToDate systems.{\textless}/AbstractText{\textgreater} {\textless}AbstractText Label="CONCLUSIONS" NlmCategory="CONCLUSIONS"{\textgreater}AskHERMES, available at http://www.AskHERMES.org, has the potential to help physicians practice evidence-based medicine and improve the quality of patient care.{\textless}/AbstractText{\textgreater}}, number = {2}, urldate = {2011-03-25}, journal = {Journal of Biomedical Informatics}, author = {Cao, Yonggang and Liu, Feifan and Simpson, Pippa and Antieau, Lamont and Bennett, Andrew and Cimino, James J and Ely, John and Yu, Hong}, month = apr, year = {2011}, pmid = {21256977 PMCID: PMC3433744}, keywords = {Algorithms, Clinical Medicine, Databases, Factual, Information Storage and Retrieval, Online Systems, Software, expert systems, natural language processing}, pages = {277--288}, }
\textlessAbstractText Label="OBJECTIVE" NlmCategory="OBJECTIVE"\textgreaterClinical questions are often long and complex and take many forms. We have built a clinical question answering system named AskHERMES to perform robust semantic analysis on complex clinical questions and output question-focused extractive summaries as answers.\textless/AbstractText\textgreater \textlessAbstractText Label="DESIGN" NlmCategory="METHODS"\textgreaterThis paper describes the system architecture and a preliminary evaluation of AskHERMES, which implements innovative approaches in question analysis, summarization, and answer presentation. Five types of resources were indexed in this system: MEDLINE abstracts, PubMed Central full-text articles, eMedicine documents, clinical guidelines and Wikipedia articles.\textless/AbstractText\textgreater \textlessAbstractText Label="MEASUREMENT" NlmCategory="METHODS"\textgreaterWe compared the AskHERMES system with Google (Google and Google Scholar) and UpToDate and asked physicians to score the three systems by ease of use, quality of answer, time spent, and overall performance.\textless/AbstractText\textgreater \textlessAbstractText Label="RESULTS" NlmCategory="RESULTS"\textgreaterAskHERMES allows physicians to enter a question in a natural way with minimal query formulation and allows physicians to efficiently navigate among all the answer sentences to quickly meet their information needs. In contrast, physicians need to formulate queries to search for information in Google and UpToDate. The development of the AskHERMES system is still at an early stage, and the knowledge resource is limited compared with Google or UpToDate. Nevertheless, the evaluation results show that AskHERMES' performance is comparable to the other systems. In particular, when answering complex clinical questions, it demonstrates the potential to outperform both Google and UpToDate systems.\textless/AbstractText\textgreater \textlessAbstractText Label="CONCLUSIONS" NlmCategory="CONCLUSIONS"\textgreaterAskHERMES, available at http://www.AskHERMES.org, has the potential to help physicians practice evidence-based medicine and improve the quality of patient care.\textless/AbstractText\textgreater
Toward automated consumer question answering: Automatically separating consumer questions from professional questions in the healthcare domain.
Liu, F.; Antieau, L. D.; and Yu, H.
Journal of Biomedical Informatics, 44(6): 1032–1038. December 2011.
Paper
doi
bibtex
abstract
@article{liu_toward_2011, title = {Toward automated consumer question answering: {Automatically} separating consumer questions from professional questions in the healthcare domain}, volume = {44}, issn = {15320464}, shorttitle = {Toward automated consumer question answering}, url = {http://linkinghub.elsevier.com/retrieve/pii/S1532046411001353}, doi = {10.1016/j.jbi.2011.08.008}, abstract = {OBJECTIVE: Both healthcare professionals and healthcare consumers have information needs that can be met through the use of computers, specifically via medical question answering systems. However, the information needs of both groups are different in terms of literacy levels and technical expertise, and an effective question answering system must be able to account for these differences if it is to formulate the most relevant responses for users from each group. In this paper, we propose that a first step toward answering the queries of different users is automatically classifying questions according to whether they were asked by healthcare professionals or consumers. DESIGN: We obtained two sets of consumer questions ({\textasciitilde}10,000 questions in total) from Yahoo answers. The professional questions consist of two question collections: 4654 point-of-care questions (denoted as PointCare) obtained from interviews of a group of family doctors following patient visits and 5378 questions from physician practices through professional online services (denoted as OnlinePractice). With more than 20,000 questions combined, we developed supervised machine-learning models for automatic classification between consumer questions and professional questions. To evaluate the robustness of our models, we tested the model that was trained on the Consumer-PointCare dataset on the Consumer-OnlinePractice dataset. We evaluated both linguistic features and statistical features and examined how the characteristics in two different types of professional questions (PointCare vs. OnlinePractice) may affect the classification performance. We explored information gain for feature reduction and the back-off linguistic category features. RESULTS: The 10-fold cross-validation results showed the best F1-measure of 0.936 and 0.946 on Consumer-PointCare and Consumer-OnlinePractice respectively, and the best F1-measure of 0.891 when testing the Consumer-PointCare model on the Consumer-OnlinePractice dataset. CONCLUSION: Healthcare consumer questions posted at Yahoo online communities can be reliably classified from professional questions posted by point-of-care clinicians and online physicians. The supervised machine-learning models are robust for this task. Our study will significantly benefit further development in automated consumer question answering.}, language = {en}, number = {6}, urldate = {2016-11-30}, journal = {Journal of Biomedical Informatics}, author = {Liu, Feifan and Antieau, Lamont D. and Yu, Hong}, month = dec, year = {2011}, pmid = {21856442 PMCID: PMC3226885}, keywords = {Artificial Intelligence, Consumer Participation, Databases, Factual, Delivery of Health Care, Humans, Information Dissemination, Information Storage and Retrieval, Internet, Point-of-Care Systems, Semantics, natural language processing}, pages = {1032--1038}, }
OBJECTIVE: Both healthcare professionals and healthcare consumers have information needs that can be met through the use of computers, specifically via medical question answering systems. However, the information needs of both groups are different in terms of literacy levels and technical expertise, and an effective question answering system must be able to account for these differences if it is to formulate the most relevant responses for users from each group. In this paper, we propose that a first step toward answering the queries of different users is automatically classifying questions according to whether they were asked by healthcare professionals or consumers. DESIGN: We obtained two sets of consumer questions (\textasciitilde10,000 questions in total) from Yahoo answers. The professional questions consist of two question collections: 4654 point-of-care questions (denoted as PointCare) obtained from interviews of a group of family doctors following patient visits and 5378 questions from physician practices through professional online services (denoted as OnlinePractice). With more than 20,000 questions combined, we developed supervised machine-learning models for automatic classification between consumer questions and professional questions. To evaluate the robustness of our models, we tested the model that was trained on the Consumer-PointCare dataset on the Consumer-OnlinePractice dataset. We evaluated both linguistic features and statistical features and examined how the characteristics in two different types of professional questions (PointCare vs. OnlinePractice) may affect the classification performance. We explored information gain for feature reduction and the back-off linguistic category features. RESULTS: The 10-fold cross-validation results showed the best F1-measure of 0.936 and 0.946 on Consumer-PointCare and Consumer-OnlinePractice respectively, and the best F1-measure of 0.891 when testing the Consumer-PointCare model on the Consumer-OnlinePractice dataset. CONCLUSION: Healthcare consumer questions posted at Yahoo online communities can be reliably classified from professional questions posted by point-of-care clinicians and online physicians. The supervised machine-learning models are robust for this task. Our study will significantly benefit further development in automated consumer question answering.
Simple and efficient machine learning frameworks for identifying protein-protein interaction relevant articles and experimental methods used to study the interactions.
Agarwal, S.; Liu, F.; and Yu, H.
BMC Bioinformatics, 12(Suppl 8): S10. 2011.
Paper
doi
bibtex
abstract
@article{agarwal_simple_2011, title = {Simple and efficient machine learning frameworks for identifying protein-protein interaction relevant articles and experimental methods used to study the interactions}, volume = {12}, issn = {1471-2105}, url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-S8-S10}, doi = {10.1186/1471-2105-12-S8-S10}, abstract = {BACKGROUND: Protein-protein interaction (PPI) is an important biomedical phenomenon. Automatically detecting PPI-relevant articles and identifying methods that are used to study PPI are important text mining tasks. In this study, we have explored domain independent features to develop two open source machine learning frameworks. One performs binary classification to determine whether the given article is PPI relevant or not, named "Simple Classifier", and the other one maps the PPI relevant articles with corresponding interaction method nodes in a standardized PSI-MI (Proteomics Standards Initiative-Molecular Interactions) ontology, named "OntoNorm". RESULTS: We evaluated our system in the context of BioCreative challenge competition using the standardized data set. Our systems are amongst the top systems reported by the organizers, attaining 60.8\% F1-score for identifying relevant documents, and 52.3\% F1-score for mapping articles to interaction method ontology. CONCLUSION: Our results show that domain-independent machine learning frameworks can perform competitively well at the tasks of detecting PPI relevant articles and identifying the methods that were used to study the interaction in such articles.}, language = {en}, number = {Suppl 8}, urldate = {2016-11-30}, journal = {BMC Bioinformatics}, author = {Agarwal, Shashank and Liu, Feifan and Yu, Hong}, year = {2011}, pmid = {22151701 PMCID: PMC3269933}, pages = {S10}, }
BACKGROUND: Protein-protein interaction (PPI) is an important biomedical phenomenon. Automatically detecting PPI-relevant articles and identifying methods that are used to study PPI are important text mining tasks. In this study, we have explored domain independent features to develop two open source machine learning frameworks. One performs binary classification to determine whether the given article is PPI relevant or not, named "Simple Classifier", and the other one maps the PPI relevant articles with corresponding interaction method nodes in a standardized PSI-MI (Proteomics Standards Initiative-Molecular Interactions) ontology, named "OntoNorm". RESULTS: We evaluated our system in the context of BioCreative challenge competition using the standardized data set. Our systems are amongst the top systems reported by the organizers, attaining 60.8% F1-score for identifying relevant documents, and 52.3% F1-score for mapping articles to interaction method ontology. CONCLUSION: Our results show that domain-independent machine learning frameworks can perform competitively well at the tasks of detecting PPI relevant articles and identifying the methods that were used to study the interaction in such articles.
Parsing citations in biomedical articles using conditional random fields.
Zhang, Q.; Cao, Y.; and Yu, H.
Computers in Biology and Medicine, 41(4): 190–194. April 2011.
Paper
doi
bibtex
abstract
@article{zhang_parsing_2011, title = {Parsing citations in biomedical articles using conditional random fields}, volume = {41}, issn = {00104825}, url = {http://linkinghub.elsevier.com/retrieve/pii/S0010482511000291}, doi = {10.1016/j.compbiomed.2011.02.005}, abstract = {Citations are used ubiquitously in biomedical full-text articles and play an important role for representing both the rhetorical structure and the semantic content of the articles. As a result, text mining systems will significantly benefit from a tool that automatically extracts the content of a citation. In this study, we applied the supervised machine-learning algorithms Conditional Random Fields (CRFs) to automatically parse a citation into its fields (e.g., Author, Title, Journal, and Year). With a subset of html format open-access PubMed Central articles, we report an overall 97.95\% F1-score. The citation parser can be accessed at: http://www.cs.uwm.edu/∼qing/projects/cithit/index.html.}, language = {en}, number = {4}, urldate = {2016-11-30}, journal = {Computers in Biology and Medicine}, author = {Zhang, Qing and Cao, Yong-Gang and Yu, Hong}, month = apr, year = {2011}, pmid = {21419403 PMCID: PMC3086470}, pages = {190--194}, }
Citations are used ubiquitously in biomedical full-text articles and play an important role for representing both the rhetorical structure and the semantic content of the articles. As a result, text mining systems will significantly benefit from a tool that automatically extracts the content of a citation. In this study, we applied the supervised machine-learning algorithms Conditional Random Fields (CRFs) to automatically parse a citation into its fields (e.g., Author, Title, Journal, and Year). With a subset of html format open-access PubMed Central articles, we report an overall 97.95% F1-score. The citation parser can be accessed at: http://www.cs.uwm.edu/∼qing/projects/cithit/index.html.
Figure Text Extraction in Biomedical Literature.
Kim, D.; and Yu, H.
PLoS ONE, 6(1): e15338. January 2011.
Paper
doi
bibtex
@article{kim_figure_2011, title = {Figure {Text} {Extraction} in {Biomedical} {Literature}}, volume = {6}, issn = {1932-6203}, url = {http://dx.plos.org/10.1371/journal.pone.0015338}, doi = {10.1371/journal.pone.0015338}, language = {en}, number = {1}, urldate = {2016-11-30}, journal = {PLoS ONE}, author = {Kim, Daehyun and Yu, Hong}, editor = {Uversky, Vladimir N.}, month = jan, year = {2011}, pmid = {21249186 PMCID: PMC3020938}, pages = {e15338}, }
Automatic figure classification in bioscience literature.
Kim, D.; Ramesh, B. P.; and Yu, H.
Journal of Biomedical Informatics, 44(5): 848–858. October 2011.
Paper
doi
bibtex
@article{kim_automatic_2011, title = {Automatic figure classification in bioscience literature}, volume = {44}, issn = {15320464}, url = {http://linkinghub.elsevier.com/retrieve/pii/S1532046411000943}, doi = {10.1016/j.jbi.2011.05.003}, language = {en}, number = {5}, urldate = {2016-11-30}, journal = {Journal of Biomedical Informatics}, author = {Kim, Daehyun and Ramesh, Balaji Polepalli and Yu, Hong}, month = oct, year = {2011}, pmid = {21645638 PMCID: PMC3176927}, pages = {848--858}, }
An investigation into the feasibility of spoken clinical question answering.
Miller, T.; Ravvaz, K.; Cimino, J. J.; and Yu, H.
AMIA ... Annual Symposium proceedings. AMIA Symposium, 2011: 954–959. 2011.
Paper
bibtex
abstract
@article{miller_investigation_2011, title = {An investigation into the feasibility of spoken clinical question answering}, volume = {2011}, issn = {1942-597X}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3243288/}, abstract = {Spoken question answering for clinical decision support is a potentially revolutionary technology for improving the efficiency and quality of health care delivery. This application involves many technologies currently being researched, including automatic speech recognition (ASR), information retrieval (IR), and summarization, all in the biomedical domain. In certain domains, the problem of spoken document retrieval has been declared solved because of the robustness of IR to ASR errors. This study investigates the extent to which spoken medical question answering benefits from that same robustness. We used the best results from previous speech recognition experiments as inputs to a clinical question answering system, and had physicians perform blind evaluations of results generated both by ASR transcripts of questions and gold standard transcripts of the same questions. Our results suggest that the medical domain differs enough from the open domain to require additional work in automatic speech recognition adapted for the biomedical domain.}, language = {ENG}, journal = {AMIA ... Annual Symposium proceedings. AMIA Symposium}, author = {Miller, Tim and Ravvaz, Kourosh and Cimino, James J. and Yu, Hong}, year = {2011}, pmid = {22195154}, pmcid = {PMC3243288}, keywords = {Decision Support Systems, Clinical, Feasibility Studies, Humans, Information Storage and Retrieval, Speech Recognition Software, natural language processing}, pages = {954--959}, }
Spoken question answering for clinical decision support is a potentially revolutionary technology for improving the efficiency and quality of health care delivery. This application involves many technologies currently being researched, including automatic speech recognition (ASR), information retrieval (IR), and summarization, all in the biomedical domain. In certain domains, the problem of spoken document retrieval has been declared solved because of the robustness of IR to ASR errors. This study investigates the extent to which spoken medical question answering benefits from that same robustness. We used the best results from previous speech recognition experiments as inputs to a clinical question answering system, and had physicians perform blind evaluations of results generated both by ASR transcripts of questions and gold standard transcripts of the same questions. Our results suggest that the medical domain differs enough from the open domain to require additional work in automatic speech recognition adapted for the biomedical domain.
Apixaban versus warfarin in patients with atrial fibrillation.
Granger, C. B.; Alexander, J. H.; McMurray, J. J. V.; Lopes, R. D.; Hylek, E. M.; Hanna, M.; Al-Khalidi, H. R.; Ansell, J.; Atar, D.; Avezum, A.; Bahit, M. C.; Diaz, R.; Easton, J. D.; Ezekowitz, J. A.; Flaker, G.; Garcia, D.; Geraldes, M.; Gersh, B. J.; Golitsyn, S.; Goto, S.; Hermosillo, A. G.; Hohnloser, S. H.; Horowitz, J.; Mohan, P.; Jansky, P.; Lewis, B. S.; Lopez-Sendon, J. L.; Pais, P.; Parkhomenko, A.; Verheugt, F. W. A.; Zhu, J.; Wallentin, L.; ARISTOTLE Committees; and Investigators
The New England Journal of Medicine, 365(11): 981–992. September 2011.
Paper
doi
bibtex
abstract
@article{granger_apixaban_2011, title = {Apixaban versus warfarin in patients with atrial fibrillation}, volume = {365}, issn = {1533-4406}, url = {http://www.nejm.org/doi/full/10.1056/NEJMoa1107039}, doi = {10.1056/NEJMoa1107039}, abstract = {BACKGROUND: Vitamin K antagonists are highly effective in preventing stroke in patients with atrial fibrillation but have several limitations. Apixaban is a novel oral direct factor Xa inhibitor that has been shown to reduce the risk of stroke in a similar population in comparison with aspirin. METHODS: In this randomized, double-blind trial, we compared apixaban (at a dose of 5 mg twice daily) with warfarin (target international normalized ratio, 2.0 to 3.0) in 18,201 patients with atrial fibrillation and at least one additional risk factor for stroke. The primary outcome was ischemic or hemorrhagic stroke or systemic embolism. The trial was designed to test for noninferiority, with key secondary objectives of testing for superiority with respect to the primary outcome and to the rates of major bleeding and death from any cause. RESULTS: The median duration of follow-up was 1.8 years. The rate of the primary outcome was 1.27\% per year in the apixaban group, as compared with 1.60\% per year in the warfarin group (hazard ratio with apixaban, 0.79; 95\% confidence interval [CI], 0.66 to 0.95; P{\textless}0.001 for noninferiority; P=0.01 for superiority). The rate of major bleeding was 2.13\% per year in the apixaban group, as compared with 3.09\% per year in the warfarin group (hazard ratio, 0.69; 95\% CI, 0.60 to 0.80; P{\textless}0.001), and the rates of death from any cause were 3.52\% and 3.94\%, respectively (hazard ratio, 0.89; 95\% CI, 0.80 to 0.99; P=0.047). The rate of hemorrhagic stroke was 0.24\% per year in the apixaban group, as compared with 0.47\% per year in the warfarin group (hazard ratio, 0.51; 95\% CI, 0.35 to 0.75; P{\textless}0.001), and the rate of ischemic or uncertain type of stroke was 0.97\% per year in the apixaban group and 1.05\% per year in the warfarin group (hazard ratio, 0.92; 95\% CI, 0.74 to 1.13; P=0.42). CONCLUSIONS: In patients with atrial fibrillation, apixaban was superior to warfarin in preventing stroke or systemic embolism, caused less bleeding, and resulted in lower mortality. (Funded by Bristol-Myers Squibb and Pfizer; ARISTOTLE ClinicalTrials.gov number, NCT00412984.).}, language = {eng}, number = {11}, journal = {The New England Journal of Medicine}, author = {Granger, Christopher B. and Alexander, John H. and McMurray, John J. V. and Lopes, Renato D. and Hylek, Elaine M. and Hanna, Michael and Al-Khalidi, Hussein R. and Ansell, Jack and Atar, Dan and Avezum, Alvaro and Bahit, M. Cecilia and Diaz, Rafael and Easton, J. Donald and Ezekowitz, Justin A. and Flaker, Greg and Garcia, David and Geraldes, Margarida and Gersh, Bernard J. and Golitsyn, Sergey and Goto, Shinya and Hermosillo, Antonio G. and Hohnloser, Stefan H. and Horowitz, John and Mohan, Puneet and Jansky, Petr and Lewis, Basil S. and Lopez-Sendon, Jose Luis and Pais, Prem and Parkhomenko, Alexander and Verheugt, Freek W. A. and Zhu, Jun and Wallentin, Lars and {ARISTOTLE Committees and Investigators}}, month = sep, year = {2011}, pmid = {21870978}, keywords = {Aged, Anticoagulants, Atrial Fibrillation, Double-Blind Method, Factor Xa Inhibitors, Female, Follow-Up Studies, Hemorrhage, Humans, International Normalized Ratio, Kaplan-Meier Estimate, Male, Middle Aged, Pyrazoles, Pyridones, Stroke, Thromboembolism, Treatment Outcome, Warfarin}, pages = {981--992}, }
BACKGROUND: Vitamin K antagonists are highly effective in preventing stroke in patients with atrial fibrillation but have several limitations. Apixaban is a novel oral direct factor Xa inhibitor that has been shown to reduce the risk of stroke in a similar population in comparison with aspirin. METHODS: In this randomized, double-blind trial, we compared apixaban (at a dose of 5 mg twice daily) with warfarin (target international normalized ratio, 2.0 to 3.0) in 18,201 patients with atrial fibrillation and at least one additional risk factor for stroke. The primary outcome was ischemic or hemorrhagic stroke or systemic embolism. The trial was designed to test for noninferiority, with key secondary objectives of testing for superiority with respect to the primary outcome and to the rates of major bleeding and death from any cause. RESULTS: The median duration of follow-up was 1.8 years. The rate of the primary outcome was 1.27% per year in the apixaban group, as compared with 1.60% per year in the warfarin group (hazard ratio with apixaban, 0.79; 95% confidence interval [CI], 0.66 to 0.95; P\textless0.001 for noninferiority; P=0.01 for superiority). The rate of major bleeding was 2.13% per year in the apixaban group, as compared with 3.09% per year in the warfarin group (hazard ratio, 0.69; 95% CI, 0.60 to 0.80; P\textless0.001), and the rates of death from any cause were 3.52% and 3.94%, respectively (hazard ratio, 0.89; 95% CI, 0.80 to 0.99; P=0.047). The rate of hemorrhagic stroke was 0.24% per year in the apixaban group, as compared with 0.47% per year in the warfarin group (hazard ratio, 0.51; 95% CI, 0.35 to 0.75; P\textless0.001), and the rate of ischemic or uncertain type of stroke was 0.97% per year in the apixaban group and 1.05% per year in the warfarin group (hazard ratio, 0.92; 95% CI, 0.74 to 1.13; P=0.42). CONCLUSIONS: In patients with atrial fibrillation, apixaban was superior to warfarin in preventing stroke or systemic embolism, caused less bleeding, and resulted in lower mortality. (Funded by Bristol-Myers Squibb and Pfizer; ARISTOTLE ClinicalTrials.gov number, NCT00412984.).
Figure summarizer browser extensions for PubMed Central.
Agarwal, S.; and Yu, H.
Bioinformatics, 27(12): 1723–1724. June 2011.
Paper
doi
bibtex
@article{agarwal_figure_2011, title = {Figure summarizer browser extensions for {PubMed} {Central}}, volume = {27}, issn = {1367-4803, 1460-2059}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btr194}, doi = {10.1093/bioinformatics/btr194}, language = {en}, number = {12}, urldate = {2016-11-30}, journal = {Bioinformatics}, author = {Agarwal, S. and Yu, H.}, month = jun, year = {2011}, pages = {1723--1724}, }
The biomedical discourse relation bank.
Prasad, R.; McRoy, S.; Frid, N.; Joshi, A.; and Yu, H.
BMC Bioinformatics, 12(1): 188. May 2011.
Paper
doi
bibtex
abstract
@article{prasad_biomedical_2011, title = {The biomedical discourse relation bank}, volume = {12}, copyright = {2011 Prasad et al; licensee BioMed Central Ltd.}, issn = {1471-2105}, url = {http://www.biomedcentral.com/1471-2105/12/188/abstract}, doi = {10.1186/1471-2105-12-188}, abstract = {Identification of discourse relations, such as causal and contrastive relations, between situations mentioned in text is an important task for biomedical text-mining. A biomedical text corpus annotated with discourse relations would be very useful for developing and evaluating methods for biomedical discourse processing. However, little effort has been made to develop such an annotated resource.}, language = {en}, number = {1}, urldate = {2013-05-23}, journal = {BMC Bioinformatics}, author = {Prasad, Rashmi and McRoy, Susan and Frid, Nadya and Joshi, Aravind and Yu, Hong}, month = may, year = {2011}, pmid = {21605399}, pages = {188}, }
Identificat