@COMMENT{{This file has been generated by bib2bib 1.79}}
@COMMENT{{Command line: 'c:\Documents and Settings\lavm\My Documents\Luc\bib\html\bib2bib.exe' -ob pasoa.bib -oc pasoa.keys -c 'pasoa = "yes"' ../lm.bib ../team.bib}}
@STRING{LNCS = {Lecture Notes in Computer Science}}
@INPROCEEDINGS{Groth:AHM04,
AUTHOR = {Paul Groth and Michael Luck and Luc Moreau},
TITLE = {Formalising a protocol for recording provenance in Grids},
OPTCROSSREF = {},
OPTKEY = {},
BOOKTITLE = {Proceedings of the UK OST e-Science second All Hands Meeting
2004 (AHM'04)},
OPTPAGES = {},
PAGECOUNT = {8},
YEAR = 2004,
PASOA = {yes},
PIND = {EZ~03~03~04},
EXPORT = {yes},
OPTEDITOR = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTSERIES = {},
ADDRESS = {Nottingham, UK},
MONTH = SEP,
OPTORGANIZATION = {},
OPTPUBLISHER = {},
OPTNOTE = {},
OPTANNOTE = {},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/ahm04-groth.pdf},
ABSTRACT = {Both the scientific and business communities are beginning to rely
on Grids as problemsolving mechanisms. These communities also have requirements
in terms of provenance. Provenance is the documentation of process and the
necessity for it is apparent in fields ranging from medicine to aerospace. To
support provenance capture in Grids, we have developed an
implementation-independent protocol for the recording of provenance. We
describe the protocol in the context of a service-oriented architecture and
formalise the entities involved using an abstract state machine or a
three-dimensional state transition diagram. Using these techniques we sketch a
liveness property for the system.}
}
@INPROCEEDINGS{Groth:OPODIS04,
AUTHOR = {Paul Groth and Michael Luck and Luc Moreau},
TITLE = {A protocol for recording provenance in service-oriented Grids},
BOOKTITLE = {Proceedings of the 8th International Conference on Principles of Distributed Systems (OPODIS'04)},
OPTCROSSREF = {},
OPTKEY = {},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/opodis04.pdf},
PIND = {EZ~03~03~04},
EXPORT = {yes},
PASOA = {yes},
PROVENANCE = {yes},
PAGES = {124--139},
YEAR = 2004,
OPTEDITOR = {},
VOLUME = {3544},
OPTNUMBER = {},
SERIES = LNCS,
ADDRESS = {Grenoble, France},
MONTH = DEC,
ISBN = {3-540-27324-7},
OPTORGANIZATION = {},
PUBLISHER = {Springer-Verlag},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {Both the scientific and business communities, which are
beginning to rely on Grids as problem-solving mechanisms, have requirements in
terms of provenance. The provenance of some data is the documentation of
process that led to the data; its necessity is apparent in fields ranging from
medicine to aerospace. To support provenance capture in Grids, we have
developed an implementation-independent protocol for the recording of
provenance. We describe the protocol in the context of a service-oriented
architecture and formalise the entities involved using an abstract state
machine or a three-dimensional state transition diagram. Using these techniques
we sketch a liveness property for the system.}
}
@ARTICLE{Miles:JOGC06,
AUTHOR = {Simon Miles and Paul Groth and Miguel Branco and Luc Moreau},
TITLE = {The requirements of recording and using provenance in e-Science
experiments},
JOURNAL = {Journal of Grid Computing},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
URL = {http://eprints.ecs.soton.ac.uk/10269/},
DOI = {10.1007/s10723-006-9055-3},
VOLUME = {5},
NUMBER = {1},
PAGES = {1--25},
YEAR = {2007},
ABSTRACT = {In e-Science experiments, it is vital to record the experimental process for later use such as in interpreting results, verifying that the correct process took place or tracing where data came from. The documentation of a process that led to some data is called the provenance of that data, and a provenance architecture is the software architecture for a system that will provide the necessary functionality to record, store and use provenance data. However, there has been little principled analysis of what is actually required of a provenance architecture, so it is impossible to determine the functionality they would ideally support. In this paper, we present use cases for a provenance architecture from current experiments in biology, chemistry, physics and computer science, and analyse the use cases to determine the technical requirements of a generic, application-independent architecture. We propose an architecture that meets these requirements and evaluate a preliminary implementation by attempting to realise one of the use cases.}
}
@INPROCEEDINGS{Moreau:HPDC05,
AUTHOR = {Paul Groth and Simon Miles and Weijian Fang and Sylvia
C. Wong and Klaus-Peter Zauner and Luc Moreau},
TITLE = {Recording and Using Provenance in a Protein Compressibility Experiment},
BOOKTITLE = {Proceedings of the 14th IEEE International Symposium on High
Performance Distributed Computing (HPDC'05)},
OPTCROSSREF = {},
OPTKEY = {},
PAGES = {201--208},
YEAR = {2005},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/hpdc05.pdf},
OPTEDITOR = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTSERIES = {},
OPTADDRESS = {},
MONTH = JUL,
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
OPTORGANIZATION = {},
OPTPUBLISHER = {},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {Very large scale computations are now becoming routinely
used as a methodology to undertake scientific research.
In this context, `provenance systems’ are regarded
as the equivalent of the scientist’s logbook for in silico experimentation:
provenance captures the documentation of
the process that led to some result. Using a protein compressibility
analysis application, we derive a set of generic
use cases for a provenance system. In order to support
these, we address the following fundamental questions:
what is provenance? how to record it? what is the performance
impact for grid execution? what is the performance
of reasoning? In doing so, we define a technologyindependent
notion of provenance that captures interactions
between components, internal component information and
grouping of interactions, so as to allow us to analyse and
reason about the execution of scientific processes. In order
to support persistent provenance in heterogeneous applications,
we introduce a separate provenance store, in
which provenance documentation can be stored, archived
and queried independently of the technology used to run the
application. Through a series of practical tests, we evaluate
the performance impact of such a provenance system. In
summary, we demonstrate that provenance recording overhead
of our prototype system remains under 10\% of execution
time, and we show that the recorded information successfully
supports our use cases in a performant manner.}
}
@TECHREPORT{Moreau:PROV05,
AUTHOR = {Luc Moreau and Liming Chen and Paul Groth and John Ibbotson and
Michael Luck and Simon Miles and Omer Rana and Victor Tan and
Willmott and Fenglian Xu},
TITLE = {Logical architecture strawman for provenance systems},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
INSTITUTION = {University of Southampton},
URL = {http://eprints.ecs.soton.ac.uk/10796/},
YEAR = {2005},
ABSTRACT = {The purpose of this document is to propose a logical architecture for a provenance
system. The logical architecture is specified independently of specific technologies.
Specifically, we introduce our definition of provenance in the context
of service-oriented architectures, and we identify the different roles that exist in
a provenance system.}
}
@INPROCEEDINGS{Groth:AHM05,
AUTHOR = {Paul Groth and Simon Miles and Luc Moreau},
TITLE = {PReServ: Provenance Recording for Services},
BOOKTITLE = {Proceedings of the UK OST e-Science second All Hands Meeting
2005 (AHM'05)},
OPTCROSSREF = {},
OPTKEY = {},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/Groth-AHM05.pdf},
PIND = {EZ~03~03~04},
EXPORT = {yes},
PASOA = {yes},
OPTPAGES = {},
YEAR = 2005,
OPTEDITOR = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTSERIES = {},
ADDRESS = {Nottingham,UK},
MONTH = SEP,
OPTORGANIZATION = {},
OPTPUBLISHER = {},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {
The importance of understanding the process by which a result was generated in
an experiment
is fundamental to science. Without such information, other scientists cannot
replicate,
validate, or duplicate an experiment. We define provenance as the process that
led to a result.
With large scale in-silico experiments, it becomes increasingly difficult for
scientists to record
process documentation that can be used to retrieve the provenance of a
result. Provenance
Recording for Services (PReServ) is a software package that allows developers
to integrate
process documentation recording into their applications. PReServ has been used
by several
applications and its performance has been benchmarked.}
}
@INPROCEEDINGS{Wong:AHM05,
AUTHOR = {Wong, Sylvia C. and Miles, Simon and Fang, Weijian and Groth, Paul and Moreau, Luc},
TITLE = {{Validation of E-Science Experiments using a Provenance-based Approach}},
BOOKTITLE = {Proceedings of Fourth All Hands Meeting (AHM'05)},
YEAR = {2005},
ADDRESS = {Nottingham},
MONTH = SEP,
PIND = {EZ~05~05~04},
URL = {http://eprints.ecs.soton.ac.uk/11063/},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
MYGRID = {yes},
GRIMOIRES = {yes},
SD = {yes},
ABSTRACT = {E-science experiments typically involve many distributed services maintained by different organisations.
As part of the scientific process, it is important for scientists to be able to verify the
correctness of their own experiments, or to review the correctness of their peers’ work. There is no
existing framework for validating such experiments. Users therefore have to rely on error checking
performed by the services, or adopt other ad hoc methods. This paper introduces a platform independent
framework for validating workflow executions. The validation relies on reasoning over the
documented provenance of experiment results and semantic descriptions of services advertised in a
registry. This validation process ensures experiments are performed correctly, and thus results generated
are meaningful. The framework is tested in a bioinformatics application that performs protein
compressibility analysis.}
}
@INPROCEEDINGS{Wong:ISWC05,
AUTHOR = {Wong, Sylvia C. and Miles, Simon and Fang, Weijian and Groth, Paul and Moreau, Luc},
TITLE = {{Provenance-based Validation of E-Science Experiments}},
BOOKTITLE = {Proceedings of 4th Internation Semantic Web Conference (ISWC'05)},
YEAR = {2005},
ADDRESS = {Galway, Ireland},
MONTH = NOV,
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/iswc05.pdf},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
MYGRID = {yes},
GRIMOIRES = {yes},
SD = {yes},
PAGES = {801--815},
SERIES = LNCS,
VOLUME = {3729},
PUBLISHER = {Springer-Verlag},
ABSTRACT = {E-Science experiments typically involve many distributed services maintained
by different organisations. After an experiment has been executed, it is useful
for a scientist to verify that the execution was performed correctly or is compatible
with some existing experimental criteria or standards. Scientists may also
want to review and verify experiments performed by their colleagues. There are
no exsiting frameworks for validating such experiments in today’s e-Science systems.
Users therefore have to rely on error checking performed by the services, or
adopt other ad hoc methods. This paper introduces a platform-independent framework
for validating workflow executions. The validation relies on reasoning over
the documented provenance of experiment results and semantic descriptions of
services advertised in a registry. This validation process ensures experiments are
performed correctly, and thus results generated are meaningful. The framework is
tested in a bioinformatics application that performs protein compressibility analysis.}
}
@PROCEEDINGS{Moreau-Foster:IPAW06,
TITLE = {{Provenance and Annotation of Data --- International Provenance and Annotation Workshop, IPAW 2006}},
YEAR = {2006},
OPTKEY = {},
OPTBOOKTITLE = {},
EDITOR = {Luc Moreau and Ian Foster},
VOLUME = {4145},
SERIES = {Lecture Notes in Computer Science},
EUPUB = {yes},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
SOCA = {yes},
OPTADDRESS = {},
MONTH = MAY,
OPTORGANIZATION = {},
PUBLISHER = {Springer-Verlag},
ISBN = {3-540-46302-X},
OPTNOTE = {},
OPTANNOTE = {},
URL = {http://www.springer.com/uk/home/generic/search/results?SGWID=3-40109-22-173681711-0},
ABSTRACT = {The International Provenance and Annotation Workshop (IPAW 2006)
was a follow-up to workshops in Chicago in October 2002 and in
Edinburgh in December 2003. It brought together computer
scientists and domain scientists with a common interest in issues
of data provenance, process documentation, data derivation, and
data annotation. IPAW 2006 was held on May 3-5, 2006 at the
University of Chicago's Gleacher Center in downtown Chicago and was
attended by roughly 45 participants.}
}
@INPROCEEDINGS{Branco:IPAW06,
AUTHOR = {Miguel Branco and Luc Moreau},
TITLE = {{Enabling provenance on large scale e-Science applications}},
BOOKTITLE = {Proceedings of the International Provenance and Annotation
Workshop (IPAW'06)},
OPTCROSSREF = {},
OPTKEY = {},
PAGES = {55--63},
YEAR = {2006},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
OPTEDITOR = {},
VOLUME = {4145},
OPTNUMBER = {},
SERIES = LNCS,
ADDRESS = {Chicago, Illinois},
ABSTRACT = {Large-scale e-Science experiments present unprecedented data han-dling requirements with their multi-petabyte data storages. Complex software applications, such as the ATLAS High Energy Physics experiment at CERN, run throughout Grid computing sites around the world in a distributed environ-ment, with scientists performing concurrent analysis on data and producing new data products shared among the collaboration. In this paper, we introduce a multi-phase infrastructure to achieve data provenance for an e-Science experi-ment. We propose an infrastructure to integrate provenance onto an existing legacy application with strong emphasis on scalability and explore the relation-ship between provenance and metadata introducing a model where data prove-nance is made available as metadata through a separate reasoning phase.},
OPTORGANIZATION = {},
PUBLISHER = {Springer-Verlag},
OPTNOTE = {},
OPTANNOTE = {}
}
@INPROCEEDINGS{Tan:IPAW06,
AUTHOR = {Victor Tan and Paul Groth and Simon Miles and Sheng Jiang and Steve
Munroe and Sofia Tsasakou and Luc Moreau},
TITLE = {{Security Issues in a SOA-based Provenance System}},
BOOKTITLE = {Proceedings of the International Provenance and Annotation
Workshop (IPAW'06)},
OPTCROSSREF = {},
OPTKEY = {},
PAGES = {203--211},
YEAR = {2006},
EUPUB = {yes},
OPTEDITOR = {},
VOLUME = {4145},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
OPTNUMBER = {},
OPTSERIES = {},
ADDRESS = {Chicago, Illinois},
OPTMONTH = {},
OPTORGANIZATION = {},
PUBLISHER = {Springer-Verlag},
URL = {http://eprints.ecs.soton.ac.uk/12569/},
OPTNOTE = {},
ABSTRACT = {Recent work has begun exploring the characterization and
utilization of provenance in systems based on the Service Oriented
Architecture (such as Web Services and Grid based
environments). One of the salient issues related to provenance use
within any given system is its security. Provenance presents some
unique security requirements of its own, which are additionally
dependent on the architectural and environmental context that a
provenance system operates in. We discuss the security
considerations pertaining to a Service Oriented Architecture based
provenance system. Concurrently, we outline possible approaches to
address them.},
OPTANNOTE = {}
}
@ARTICLE{Moreau:CACM07,
AUTHOR = {Luc Moreau and Paul Groth and Simon Miles and Javier Vazquez and
John Ibbotson and Sheng Jiang and Steve Munroe and Omer Rana and
Andreas Schreiber and Victor Tan and Laszlo Varga},
TITLE = {{The Provenance of Electronic Data}},
JOURNAL = {Communications of the ACM},
YEAR = {2007},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/cacm06.pdf},
EUPUB = {yes},
PASOA = {yes},
OPTKEY = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTPAGES = {},
OPTMONTH = {},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {In the study of fine art, provenance refers to the documented history
of some art object. Given that documented history, the object attains an
authority that allows scholars to appreciate its importance with respect to
other works, whereas, in the absence of such history, the
object may be treated with some skepticism. Our IT landscape is evolving as
illustrated by applications that are open, composed dynamically, and that
discover results and services on the fly. Against this challenging background,
it is crucial for users to be able to have confidence in the results produced
by such applications. If the provenance of data produced by computer
systems could be determined as it can for some works of art, then users,
in their daily applications, would be able to interpret and judge the quality
of data better. We introduce a provenance lifecycle and
advocate an open approach based on two key principles to support a notion of
provenance in computer systems: documentation of execution and user-tailored
provenance queries.}
}
@INPROCEEDINGS{Chen:AHM05,
AUTHOR = {Liming Chen and Victor Tan and Fenglian Xu and Alexis Biller and Paul Groth and Simon Miles and John Ibbotson and Michael Luck and Luc Moreau},
TITLE = {{A Proof of Concept: Provenance in a Service Oriented Architecture}},
BOOKTITLE = {Proceedings of the Fourth All Hands Meeting (AHM)},
EXPORT = {yes},
YEAR = {2005},
MONTH = SEP,
DISSEMINATION = {public},
URL = {http://www.allhands.org.uk/2005/proceedings/papers/503.pdf},
ABSTRACT = {Provenance has been identified as an emerging and important concept within the Grid community
for a variety of purposes, such as verifying or tracing results. We seek to provide a concrete
conception of provenance and its possible utilisation through the process of designing and
implementing a system prototype with some specific provenance requirements. This prototype,
which is based on an idealised recipe for baking a cake, is developed within the context of a
service oriented Grid computing environment and implemented using standard Web Services
technologies. The issues surrounding the design of possible provenance system are also explored.},
EUPUB = {yes},
PASOA = {yes}
}
@TECHREPORT{OGSA-Data-Scenarios:GGF,
AUTHOR = {Stephen Davey and Ali Anjomshoaa and Mario Antonioletti and Malcolm Atkinson and Dave Berry and Ann Chervenak and Adrian Jackson and Chris Jordan and Peter Kunszt and Allen Luniewski and Luc Moreau},
TITLE = {{OGSA Data Scenarios v0.13}},
INSTITUTION = {Global Grid Forum},
YEAR = {2006},
OPTKEY = {},
OPTTYPE = {},
OPTNUMBER = {},
OPTADDRESS = {},
OPTMONTH = JUN,
PROVENANCE = {yes},
PASOA = {yes},
EXPORT = {yes},
EUPUB = {yes},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {This document provides example scenarios of a generic nature to
accompany the OGSA Data Architecture document [OGSA Data
Arch]. It should be noted that this is not a use case
document generating requirements of the OGSA Data
Architecture. Instead this document comes from the opposite
direction, providing illustrations of how the components and
interfaces described in the OGSA Data Architecture document
can be put together in a selection of typical data
scenarios. {\em This document contains some provenance
related scenarios in Section 9.}},
URL = {https://forge.gridforum.org/sf/go/doc13605?nav=1}
}
@INPROCEEDINGS{Munroe:SEM06,
AUTHOR = {Munroe, Steve and Miles, Simon and Moreau, Luc and
V\'{a}zquez-Salceda, Javier},
TITLE = {{Pr{IM}e: A Software Engineering Methodology for Developing Provenance-Aware Applications}},
BOOKTITLE = {ACM Digital Proceedings of the Software Engineering and Middleware Workshop
(SEM'06)},
YEAR = {2006},
PROVENANCE = {yes},
PASOA = {yes},
EXPORT = {yes},
EUPUB = {yes},
URL = {http://eprints.ecs.soton.ac.uk/13062/},
ABSTRACT = {Provenance is a concept often used in the Art world to refer to the
documented history of an artifact, providing information about the
artifact’s lineage and authenticity. Provenance-aware applications
similarly allow their users to have confidence about the data they
produce, and can enable users to make judgements relating to notions
of trust, accountability, validation, replication and compliance of
their data. PrIMe is a software engineering methodology for adapting
applications to enable them to interact with a provenance middleware
layer, thereby making them provenance-aware. Such applications allow
users to answer questions about provenance use cases, which are
descriptions of scenarios in which a user interacts with a system by
performing particular functions on that system. In order to
illustrate how PrIMe can make applications provenance-aware, an
Organ Transplant Management example application is used.}
}
@ARTICLE{Kifor:IS06,
AUTHOR = {Tam\'as Kifor and L\'aszl\'o Z. Varga and
Javier V\'azquez-Salceda and Sergio \'Alvarez
and Steven Willmott and Simon Miles and Luc Moreau},
TITLE = {{Provenance in Agent-mediated Healthcare Systems}},
JOURNAL = {IEEE Intelligent Systems},
YEAR = {2006},
OPTKEY = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTPAGES = {},
MONTH = {Nov/Dec},
URL = {http://www.gridprovenance.org/publications/ProvenanceInAgentMediatedHealthcareSystems-V19.doc},
OPTNOTE = {},
OPTANNOTE = {},
PROVENANCE = {yes},
PASOA = {yes},
EUPUB = {yes},
EXPORT = {yes},
ABSTRACT = {Agent-oriented cooperation techniques and standardized electronic
healthcare record exchange protocols can be used to combine
information regarding different facets of a therapy received
by a patient from different healthcare providers at different
locations. Provenance is an innovative approach to trace
events in complex distributed processes, dependencies between
such events, and associated decisions by human actors. We
focus on three aspects of provenance in agent-mediated
healthcare systems: first, we define the provenance concept
and show how it can be applied to agent-mediated healthcare
applications; second, we investigate and provide a method for
independent and autonomous healthcare agents to document the
processes they are involved in without directly interacting
with each other; and third, we show that this method solves
the privacy issues of provenance in agent-mediated healthcare
systems}
}
@ARTICLE{Bose-Foster-Moreau:IPAW06,
AUTHOR = {Raj Bose and Ian Foster and Luc Moreau},
TITLE = {{Report on the International Provenance and Annotation Workshop (IPAW’06)}},
JOURNAL = {Sigmod Records},
YEAR = {2006},
OPTKEY = {},
VOLUME = {35},
NUMBER = {3},
PAGES = {51--53},
MONTH = SEP,
EXPORT = {yes},
PROVENANCE = {yes},
EUPUB = {yes},
PASOA = {yes},
SOCA = {yes},
OPTNOTE = {},
OPTANNOTE = {},
URL = {http://www.sigmod.org/sigmod/record/issues/0609/sigmod-record.september2006.pdf},
ABSTRACT = {The International Provenance and Annotation Workshop (IPAW’06) was
held May 3-5, 2006 at the University of Chicago’s Gleacher Center
in downtown Chicago; it was co-chaired by Luc Moreau (University of
Southampton) and Ian Foster (University of Chicago and Argonne
National Laboratory) and included roughly 45 participants,
representing about 25 organizations or projects.}
}
@ARTICLE{Miles:WEBSEM07,
AUTHOR = {Simon Miles and Sylvia C. Wong and Weijian Fang and Paul
Groth and Klaus-Peter Zauner and Luc Moreau},
TITLE = {Provenance-Based Validation of e-Science Experiments},
JOURNAL = {Web Semantics: Science, Services and Agents
on the World Wide Web},
YEAR = {2007},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/WEBSEM07.pdf},
EXPORT = {yes},
PROVENANCE = {yes},
PASOA = {yes},
MYGRID = {yes},
GRIMOIRES = {yes},
SD = {yes},
OPTKEY = {},
VOLUME = {5},
NUMBER = {1},
ISSN = {1570-8268},
PAGES = {28--38},
DOI = {doi:10.1016/j.websem.2006.11.003},
OPTMONTH = {},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {E-science experiments typically involve many distributed services maintained by different organisations. After an experiment has been executed,
it is useful for a scientist to verify that the execution was performed correctly or is compatible with some existing experimental criteria or standards,
not necessarily anticipated prior to execution. Scientists may also want to review and verify experiments performed by their colleagues. There are
no existing frameworks for validating such experiments in today’s e-science systems. Users therefore have to rely on error checking performed
by the services, or adopt other ad hoc methods. This paper introduces a platform-independent framework for validating workflow executions.
The validation relies on reasoning over the documented provenance of experiment results and semantic descriptions of services advertised in a
registry. This validation process ensures experiments are performed correctly, and thus results generated are meaningful. The framework is tested
in a bioinformatics application that performs protein compressibility analysis.}
}
@ARTICLE{Editorial:Challenge06,
AUTHOR = {Luc Moreau and
Bertram Lud\"ascher and
Ilkay Altintas and
Roger S. Barga and
Shawn Bowers and
Steven Callahan and
George {Chin Jr.} and
Ben Clifford and
Shirley Cohen and
Sarah Cohen-Boulakia and
Susan Davidson and
Ewa Deelman and
Luciano Digiampietri and
Ian Foster and
Juliana Freire and
James Frew and
Joe Futrelle and
Tara Gibson and
Yolanda Gil and
Carole Goble and
Jennifer Golbeck and
Paul Groth and
David A. Holland and
Sheng Jiang and
Jihie Kim and
David Koop and
Ales Krenek and
Timothy McPhillips and
Gaurang Mehta and
Simon Miles and
Dominic Metzger and
Steve Munroe and
Jim Myers and
Beth Plale and
Norbert Podhorszki and
Varun Ratnakar and
Emanuele Santos and
Carlos Scheidegger and
Karen Schuchardt and
Margo Seltzer and
Yogesh L. Simmhan and
Claudio Silva and
Peter Slaughter and
Eric Stephan and
Robert Stevens and
Daniele Turi and
Huy Vo and
Mike Wilde and
Jun Zhao and
Yong Zhao
},
TITLE = {{The First Provenance Challenge}},
JOURNAL = {Concurrency and Computation: Practice and Experience},
YEAR = {2007},
EUPUB = {yes},
PASOA = {yes},
EXPORT = {yes},
OPTKEY = {},
OPTVOLUME = {in this issue},
OPTNUMBER = {},
OPTPAGES = {},
OPTMONTH = {},
OPTNOTE = {},
DOI = {DOI: 10.1002/cpe.1233},
OPTANNOTE = {},
SOCA = {yes},
ABSTRACT = {The first Provenance Challenge was set up in order to provide a forum for the community to help understand the capabilities of different provenance systems
and the expressiveness of their provenance representations.
To this end, a Functional
Magnetic Resonance Imaging workflow was defined, which participants
had to either simulate or run in order to produce some provenance
representation, from which a set of identified queries had to be
implemented and executed. Sixteen teams responded to the
challenge, and submitted their inputs. In this paper, we present
the challenge workflow and queries, and summarise the participants
contributions.}
}
@ARTICLE{OPA:Challenge06,
AUTHOR = {Simon Miles and Paul Groth and Steve Munroe and Sheng Jiang
and Thibaut Assandri and Luc Moreau},
TITLE = {{Extracting Causal Graphs from an Open Provenance Data Model}},
JOURNAL = {Concurrency and Computation: Practice and Experience},
YEAR = {2007},
EUPUB = {yes},
PASOA = {yes},
EXPORT = {yes},
OPTKEY = {},
OPTVOLUME = {in this issue},
OPTNUMBER = {},
OPTPAGES = {},
OPTMONTH = {},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {The open provenance architecture (OPA) approach to the challenge
was distinct in several regards. In particular, it is based on an
open, well-defined data model and architecture, allowing different
components of the challenge workflow to independently record
documentation, and for the workflow to be executed in any
environment. Another noticeable feature is that we distinguish
between the data recorded about what has occurred, \emph{process
documentation}, and the \emph{provenance} of a data item, which is
all that caused the data item to be as it is and is obtained as the
result of a query over process documentation. This distinction
allows us to tailor the system to separately best address the
requirements of recording and querying documentation. Other
notable features include the explicit recording of causal
relationships between both events and data items, an
interaction-based world model, intensional definition of data items
in queries rather than relying on explicit naming mechanisms, and
\emph{styling} of documentation to support non-functional
application requirements such as reducing storage costs or ensuring
privacy of data. In this paper we describe how each of these
features aid us in answering the challenge provenance queries.}
}
@INPROCEEDINGS{Miles:AAMAS07,
AUTHOR = {Simon Miles and Steve Munroe and Michael Luck and Luc Moreau},
TITLE = {Modelling the Provenance of Data in Autonomous Systems},
OPTCROSSREF = {},
OPTKEY = {},
BOOKTITLE = {Proceedings of the Sixth International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS'07)},
OPTPAGES = {},
YEAR = {2007},
PASOA = {yes},
EXPORT = {yes},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/aamas07.pdf},
OPTEDITOR = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTSERIES = {},
OPTADDRESS = {},
OPTMONTH = {},
OPTORGANIZATION = {},
OPTPUBLISHER = {},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {Determining the provenance of data, i.e. the process that led to
that
data, is vital in many disciplines. For example, in science, the process
that produced a given result must be demonstrably rigorous for
the result to be deemed reliable. A provenance system supports applications
in recording adequate documentation about process executions
to answer queries regarding provenance, and provides functionality
to perform those queries. Several provenance systems are
being developed, but all focus on systems in which the components
are reactive, for example Web Services that act on the basis
of a request, job submission system, etc. This limitation means that
questions regarding the motives of autonomous actors, or agents, in
such systems remain unanswerable in the general case. Such questions
include: who was ultimately responsible for a given effect,
what was their reason for initiating the process and does the effect
of a process match what was intended to occur by those initiating
the process? In this paper, we address this limitation by integrating
two solutions: a generic, re-usable framework for representing
the provenance of data in service-oriented architectures and a
model for describing the goal-oriented delegation and engagement
of agents in multi-agent systems. Using these solutions, we present
algorithms to answer common questions regarding responsibility
and success of a process and evaluate the approach with a simulated
healthcare example.}
}
@INPROCEEDINGS{Miles:Methodo07,
AUTHOR = {Simon Miles and Paul Groth and Steve Munroe and Michael Luck and
Luc Moreau},
TITLE = {{AgentPrIMe: Adapting MAS Designs to Build Confidence}},
OPTCROSSREF = {},
OPTKEY = {},
BOOKTITLE = {Agent-Oriented Software Engineering (AOSE'07)},
OPTPAGES = {},
YEAR = {2007},
PASOA = {yes},
EXPORT = {yes},
URL = {http://www.ecs.soton.ac.uk/~lavm/papers/aose07.pdf},
OPTEDITOR = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTSERIES = {},
OPTADDRESS = {},
OPTMONTH = {},
OPTORGANIZATION = {},
OPTPUBLISHER = {},
OPTNOTE = {},
OPTANNOTE = {},
ABSTRACT = {The products of systems cannot always be judged at face value: the
process by which they were obtained is also important. For
instance, the rigour of a scientic experiment, the ethics with
which an item was manufactured and the use of services with
particular licens- ing all aect how the results of those
processes are valued. However, in systems of autonomous agents, and
particularly those with multiple independent contributory
organisations, the ability of agents to choose how their goals or
responsibilities are achieved can hide such process qualities from
users. The issue of ensuring that users are able to check these
process qualities is a software engineering one: the developer must
decide to ensure that adequate data is recorded regarding processes
and safeguards implemented to ensure accuracy. In this paper, we
describe AgentPrIMe, an adjunct to existing agent-oriented
methodologies that allows system designs to be adapted to give
users condence in the results they produce. It does this by
adaptations to the design for documenta- tion, corroboration,
independent storage and accountability.}
}
@INBOOK{Moreau:HPC07,
AUTHOR = {Paul Groth and Steve Munroe and Simon Miles and Luc Moreau},
ALTEDITOR = {},
TITLE = {HPC and Grids in Action},
CHAPTER = {{Applying the Provenance Data Model to a Bioinformatics Case}},
PUBLISHER = {IOS Press},
YEAR = {2007},
PASOA = {yes},
OPTKEY = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTSERIES = {},
OPTTYPE = {},
OPTADDRESS = {},
OPTEDITION = {},
OPTMONTH = {},
OPTPAGES = {},
OPTNOTE = {},
OPTANNOTE = {}
}
@ARTICLE{Groth:TOIT,
AUTHOR = {Paul Groth and Luc Moreau},
TITLE = {{A Shared Model for Documentation of Processes Enabling the Determination of Provenance}},
PASOA = {yes},
JOURNAL = {Submitted for Publication},
YEAR = {2007},
OPTKEY = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTPAGES = {},
OPTMONTH = {},
OPTNOTE = {},
OPTANNOTE = {}
}
@ARTICLE{Groth:TPDS07,
AUTHOR = {Paul Groth and Luc Moreau},
TITLE = {Recording Process Documentation for Provenance},
JOURNAL = {Submitted for Publication},
YEAR = {2007},
PASOA = {yes},
OPTKEY = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTPAGES = {},
OPTMONTH = {},
OPTNOTE = {},
OPTANNOTE = {}
}
@INPROCEEDINGS{Miles:eScience07,
AUTHOR = {Simon Miles and Ewa Deelman and Paul Groth and Karan Vahi and
Gaurang Mehta and Luc Moreau},
TITLE = {Connecting Scientific Data to Scientific Experiments with
Provenance},
OPTCROSSREF = {},
OPTKEY = {},
OPTBOOKTITLE = {},
OPTPAGES = {},
YEAR = {2007},
OPTEDITOR = {},
OPTVOLUME = {},
OPTNUMBER = {},
OPTSERIES = {},
OPTADDRESS = {},
OPTMONTH = {},
OPTORGANIZATION = {},
OPTPUBLISHER = {},
OPTNOTE = {},
OPTANNOTE = {},
SOCA = {yes},
PASOA = {yes},
ABSTRACT = {As scientific workflows, and the data they operate on, grow in size and complexity, the task of defining how those workflows should execute (which resources they should use, where those resources should be in preparation for processing etc.) becomes proportionally more difficult. While `workflow compilers', such as Pegasus, aid greatly in reducing this burden, a further problem arises: as specifying the details of execution is now automatic, a workflow's results are harder to interpret, as they are in part due to the specifics of execution. By automating the steps between the original experiment design and its results, we lose the connection between them, making results harder to interpret. To reconnect the scientific data with the original experiment, we argue that scientists should have access to the full provenance of their data, including not only parameters, input data and intermediary results, but also the abstract experiment, refined into a concrete execution by the `workflow compiler'. In this paper, we describe our preliminary work on adapting Pegasus to capture the process of workflow refinement in the PASOA provenance system.}
}
@ARTICLE{miles07determining,
AUTHOR = {Simon Miles and Luc Moreau},
TITLE = {Determining Provenance Through Scoped Queries Over Causal Graphs},
JOURNAL = {To Be Submitted},
PASOA = {yes},
YEAR = {2007},
ABSTRACT = {The provenance of entities, whether electronic data or
physical artefacts,
is crucial information in practically all domains, including science,
business and art. The increased use of software in automating our
activities provides the opportunity to add greatly to the amount
we can know about an entity's history and the process by which it
came to be as it is. However, it also presents difficulties: querying
for the provenance of an entity could potentially return detailed
information stretching back far into the past, most of it irrelevant
to the querier. In this paper, we define the concept of a provenance
query and describe techniques that allow us to perform scoped provenance
queries, by which a querier can declare in advance exactly what in
the item's history is relevant to them. Using these techniques, a
user can discover the provenance of data in the applications they
use.}
}
@TECHREPORT{opm:2007,
AUTHOR = {Luc Moreau and Juliana Freire and Jim Myers and Joe Futrelle
and Patrick Paulson},
TITLE = {The Open Provenance Model},
INSTITUTION = {University of Southampton},
YEAR = {2007},
PASOA = {yes},
OPTKEY = {},
OPTTYPE = {},
OPTNUMBER = {},
OPTADDRESS = {},
OPTMONTH = {},
OPTNOTE = {},
OPTANNOTE = {}
}
@INPROCEEDINGS{Townend2005,
AUTHOR = {Paul Townend and Paul Groth and Jie Xu},
TITLE = {A Provenance-Aware Weighted Fault Tolerance Scheme for Service-Based Applications},
BOOKTITLE = {Proc. of the 8th IEEE International Symposium on Object-oriented Real-time distributed Computing (ISORC 2005)},
YEAR = {2005},
MONTH = MAY,
PASOA = {yes},
EXPORT = {yes},
ABSTRACT = {Service-orientation has been proposed as a
way of facilitating the development and integration of increasingly
complex and heterogeneous system components. However, there are many
new challenges to the dependability community in this new paradigm,
such as how individual channels within fault-tolerant systems may
invoke common services as part of their workflow, thus increasing
the potential for common-mode failure. We propose a scheme that -
for the first time - links the technique of provenance with that of
multi-version fault tolerance. We implement a large test system and perform
experiments with a single-version system, a traditional MVD system, and
a provenance-aware MVD system, and compare their results. We show
that for this experiment, our provenance-aware scheme results in a
much more dependable system than either of the other systems tested,
whilst imposing a negligible timing overhead. },
OWNER = {pgroth}
}
@INPROCEEDINGS{Miles:BIOMED05,
AUTHOR = {Simon Miles},
TITLE = {Agent-Oriented Data Curation in Bioinformatics},
BOOKTITLE = {Proceedings of Workshop on Multi-Agent Systems in Medicine, Computational Biology, and Bioinformatics (MAS*BioMed'05)},
YEAR = {2005},
MONTH = JUL,
OWNER = {sm},
PASOA = {yes},
EXPORT = {yes},
PAGES = {157-169},
ABSTRACT = {Bioinformatics is a fast-growing field in which biological data
is analysed and shared using software tools. However, due to the
field’s success, the size and complexity of the data being produced is
increasing fast. It also means that new, relatively inexperienced
researchers are constantly being recruited. Together, these
characteristics make it hard for organisations to ensure that work is
being undertaken on the best available data and with the best available
tools. Several strands of research aim to support the bioinformatics
community in managing the complexity of their experiments. In our own
recent work, we have focused on recording the provenance of
experimental results [8]. The provenance of a piece of data is the
process that led to that data, and provenance data is the documentation
of that process. We have determined a number of provenancerelated use
cases in bioinformatics [11] through interviews with scientists, such
as the comparison of two experiment runs to determine why results were
different, and justifying that the experiment was performed in a valid
way to others. We have provided software to record and maintain
provenance data in provenance stores.},
PASOA = {yes},
URL = {http://eprints.ecs.soton.ac.uk/10853/}
}
@ARTICLE{Miles06,
AUTHOR = {Simon Miles},
TITLE = {Agent-Oriented Data Curation in Bioinformatics},
JOURNAL = {International Transactions on Systems Science and Applications},
YEAR = {2006},
PASOA = {yes},
EXPORT = {yes},
PAGES = {43--50},
VOLUME = 1,
NUMBER = 1,
ABSTRACT = {Bioinformatics is a fast-growing field in which biological data
is analysed and shared using software tools. However, due to the
field’s success, the size and complexity of the data being produced is
increasing fast. It also means that new, relatively inexperienced
researchers are constantly being recruited. Together, these
characteristics make it hard for organisations to ensure that work is
being undertaken on the best available data and with the best available
tools. Several strands of research aim to support the bioinformatics
community in managing the complexity of their experiments. In our own
recent work, we have focused on recording the provenance of
experimental results [8]. The provenance of a piece of data is the
process that led to that data, and provenance data is the documentation
of that process. We have determined a number of provenancerelated use
cases in bioinformatics [11] through interviews with scientists, such
as the comparison of two experiment runs to determine why results were
different, and justifying that the experiment was performed in a valid
way to others. We have provided software to record and maintain
provenance data in provenance stores.},
PASOA = {yes},
URL = {http://eprints.ecs.soton.ac.uk/10853/}
}
@INPROCEEDINGS{Townend2005a,
AUTHOR = {Paul Townend and Paul Groth and Nik Looker and Jie Xu},
TITLE = {{FT-Grid: A Fault-Tolerance System for e-Science}},
BOOKTITLE = {Proceedings of the UK OST e-Science Fourth All Hands Meeting
(AHM05)},
YEAR = {2005},
PASOA = {yes},
PUBLISH = {yes},
MONTH = {September},
OWNER = {pgroth},
ABSTRACT = {The size and complexity of many e-Science applications suggests
that they may be very prone to errors and
failures; the cost of recovering from failures may also be high. The
FT-Grid system, developed as part of the e-Demand project at the
University of Leeds [1], introduces a replication-based fault tolerance
scheme that allows faults occurring in service-based systems to be
tolerated, thus increasing the dependability of such systems. This paper
details the progress that has been made in the development of FT-Grid,
including both a GUI client and also an FT-Grid web service interface.
We show empirical evidence of the dependability benefits offered
by FT-Grid, by performing a dependability analysis on the results
of fault injection testing performed with the WS-FIT tool at the
University of Durham. We then illustrate a potential problem with voting
based fault tolerance approaches in the service-oriented paradigm .
namely, that individual channels within fault-tolerant systems may
invoke common services as part of their workflow, thus increasing
the potential for commonmode failure. We propose a solution to this
issue by using the technique of provenance to provide FT-Grid with
topological awareness. We implement a large test system, and - with
the use of the PreServ provenance system developed as part of the
PASOA e-Science project at the University of Southampton - perform
a large number of experiments which show that a provenance-aware
FTGrid results in a much more dependable system than any of the other
configurations tested, whilst imposing a negligible timing overhead.},
URL = {./mypapers/ahm-ftgrid-3.pdf}
}
@INPROCEEDINGS{Groth:IPAW06,
AUTHOR = {Paul Groth and Simon Miles and Steve Munroe},
TITLE = {{Principles of High Quality Documentation for Provenance: A
Philosophical Discussion}},
BOOKTITLE = {International Provenance and Annotation Workshop (IPAW'06),},
YEAR = {2006},
EDITOR = {Luc Moreau and Ian Foster},
VOLUME = {4145},
SERIES = {Lecture Notes in Computer Science},
MONTH = {May},
PUBLISHER = {Springer},
URL = {http://eprints.ecs.soton.ac.uk/12568/},
EXPORT = {yes},
PASOA = {yes},
EUPUB = {yes},
PAGES = {278--286},
OWNER = {pgroth},
ABSTRACT = {Computer technology enables the creation of
detailed documentation about the processes that create or affect
entities (data, objects, etc.). Such documentation of the past can be
used to answer various kinds of questions regarding the processes
that led to the creation or modification of a particular entity. The
answer to such questions are known as an entity’s provenance. In this
paper, we derive a number of principles for documenting the past,
grounded in work from philosophy and history, which allow for provenance
questions to be answered within a computational context. These principles
lead us to argue that an interaction-based model is particularly
suited for representing high quality documentation of the past. }
}
@INPROCEEDINGS{Miles:IPAW06,
AUTHOR = {Simon Miles},
TITLE = {{Electronically Querying for the Provenance of Entities}},
BOOKTITLE = {Proceedings of the International Provenance and Annotation Workshop
2006 (IPAW 2006)},
YEAR = {2006},
PASOA = {yes},
PAGES = {184-192},
SERIES = {Lecture Notes in Computer Science},
PUBLISHER = {Springer},
ABSTRACT = {The provenance of entities, whether electronic data or physical artefacts,
is crucial information in practically all domains, including science,
business and art. The increased use of software in automating activities
provides the opportunity to add greatly to the amount we can know
about an entity’s history and the process by which it came to be
as it is. However, it also presents difficulties: querying for the
provenance of an entity could potentially return detailed information
stretching back to the beginning of time, and most of it possibly
irrelevant to the querier. In this paper, we define the concept
of provenance query and describe techniques that allow us to perform
scoped provenance queries.},
URL = {http://eprints.ecs.soton.ac.uk/12567/}
}
@PHDTHESIS{Groth:PhdThesis07,
AUTHOR = {Paul Groth},
EXPORT = {yes},
PASOA = {yes},
TITLE = {The Origin of Data: Enabling the Determination of Provenance in Multi-institutional Scientific Systems through the Documentation of Processes},
SCHOOL = {University of Southampton},
URL = {},
MONTH = {},
YEAR = {2007}
}