This site has been permanently archived. This is a static copy provided by the University of Southampton.
@misc{cogprints3732,
title = {Word Sense Disambiguation by Web Mining for Word Co-occurrence Probabilities},
author = {Peter D. Turney},
year = {2004},
pages = {239--242},
url = {http://cogprints.org/3732/},
abstract = {This paper describes the National Research Council (NRC)
Word Sense Disambiguation (WSD) system, as applied to the
English Lexical Sample (ELS) task in Senseval-3. The NRC system
approaches WSD as a classical supervised machine learning problem,
using familiar tools such as the Weka machine learning software
and Brill's rule-based part-of-speech tagger. Head words are
represented as feature vectors with several hundred features.
Approximately half of the features are syntactic and the other
half are semantic. The main novelty in the system is the method for
generating the semantic features, based on word co-occurrence
probabilities. The probabilities are estimated using
the Waterloo MultiText System with a corpus of about one terabyte of
unlabeled text, collected by a web crawler.}
}