This site has been permanently archived. This is a static copy provided by the University of Southampton.
@misc{cogprints7642,
title = {Automatic Wrapper Adaptation by Tree Edit Distance Matching},
author = {Emilio Ferrara and Robert Baumgartner},
year = {2010},
pages = {17--23},
journal = {Combinations of Intelligent Methods and Applications},
url = {http://cogprints.org/7642/},
abstract = {Information distributed through the Web keeps growing faster day by day,
and for this reason, several techniques for extracting Web data have been suggested
during last years. Often, extraction tasks are performed through so called wrappers,
procedures extracting information from Web pages, e.g. implementing logic-based
techniques. Many ?elds of application today require a strong degree of robustness
of wrappers, in order not to compromise assets of information or reliability of data
extracted.
Unfortunately, wrappers may fail in the task of extracting data from a Web page, if
its structure changes, sometimes even slightly, thus requiring the exploiting of new
techniques to be automatically held so as to adapt the wrapper to the new structure
of the page, in case of failure. In this work we present a novel approach of automatic wrapper adaptation based on the measurement of similarity of trees through
improved tree edit distance matching techniques.}
}