@COMMENT{{This file has been generated by bib2bib 1.79}}

@COMMENT{{Command line: 'c:\Documents and Settings\lavm\My Documents\Luc\bib\html\bib2bib.exe' -ob pasoa.bib -oc pasoa.keys -c 'pasoa = "yes"' ../lm.bib ../team.bib}}

@STRING{LNCS = {Lecture Notes in Computer Science}}

@INPROCEEDINGS{Groth:AHM04,
  AUTHOR = {Paul Groth and Michael Luck and Luc Moreau},
  TITLE = {Formalising a protocol for recording provenance in Grids},
  OPTCROSSREF = {},
  OPTKEY = {},
  BOOKTITLE = {Proceedings of the UK OST e-Science second All Hands Meeting
                  2004 (AHM'04)},
  OPTPAGES = {},
  PAGECOUNT = {8},
  YEAR = 2004,
  PASOA = {yes},
  PIND = {EZ~03~03~04},
  EXPORT = {yes},
  OPTEDITOR = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTSERIES = {},
  ADDRESS = {Nottingham, UK},
  MONTH = SEP,
  OPTORGANIZATION = {},
  OPTPUBLISHER = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/ahm04-groth.pdf},
  ABSTRACT = {Both the scientific and business communities are beginning to rely
on Grids as problemsolving mechanisms. These communities also have requirements
in terms of provenance.  Provenance is the documentation of process and the
necessity for it is apparent in fields ranging from medicine to aerospace. To
support provenance capture in Grids, we have developed an
implementation-independent protocol for the recording of provenance. We
describe the protocol in the context of a service-oriented architecture and
formalise the entities involved using an abstract state machine or a
three-dimensional state transition diagram. Using these techniques we sketch a
liveness property for the system.}
}

@INPROCEEDINGS{Groth:OPODIS04,
  AUTHOR = {Paul Groth and Michael Luck and Luc Moreau},
  TITLE = {A protocol for recording provenance in service-oriented Grids},
  BOOKTITLE = {Proceedings of the 8th International Conference on Principles of Distributed Systems (OPODIS'04)},
  OPTCROSSREF = {},
  OPTKEY = {},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/opodis04.pdf},
  PIND = {EZ~03~03~04},
  EXPORT = {yes},
  PASOA = {yes},
  PROVENANCE = {yes},
  PAGES = {124--139},
  YEAR = 2004,
  OPTEDITOR = {},
  VOLUME = {3544},
  OPTNUMBER = {},
  SERIES = LNCS,
  ADDRESS = {Grenoble, France},
  MONTH = DEC,
  ISBN = {3-540-27324-7},
  OPTORGANIZATION = {},
  PUBLISHER = {Springer-Verlag},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Both the scientific and business communities, which are
beginning to rely on Grids as problem-solving mechanisms, have requirements in
terms of provenance. The provenance of some data is the documentation of
process that led to the data; its necessity is apparent in fields ranging from
medicine to aerospace. To support provenance capture in Grids, we have
developed an implementation-independent protocol for the recording of
provenance. We describe the protocol in the context of a service-oriented
architecture and formalise the entities involved using an abstract state
machine or a three-dimensional state transition diagram. Using these techniques
we sketch a liveness property for the system.}
}

@ARTICLE{Miles:JOGC06,
  AUTHOR = {Simon Miles and Paul Groth and Miguel Branco and Luc Moreau},
  TITLE = {The requirements of recording and using provenance in e-Science
              experiments},
  JOURNAL = {Journal of Grid Computing},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  URL = {http://eprints.ecs.soton.ac.uk/10269/},
  DOI = {10.1007/s10723-006-9055-3},
  VOLUME = {5},
  NUMBER = {1},
  PAGES = {1--25},
  YEAR = {2007},
  ABSTRACT = {In e-Science experiments, it is vital to record the experimental process for later use such as in interpreting results, verifying that the correct process took place or tracing where data came from. The documentation of a process that led to some data is called the provenance of that data, and a provenance architecture is the software architecture for a system that will provide the necessary functionality to record, store and use provenance data. However, there has been little principled analysis of what is actually required of a provenance architecture, so it is impossible to determine the functionality they would ideally support. In this paper, we present use cases for a provenance architecture from current experiments in biology, chemistry, physics and computer science, and analyse the use cases to determine the technical requirements of a generic, application-independent architecture. We propose an architecture that meets these requirements and evaluate a preliminary implementation by attempting to realise one of the use cases.}
}

@INPROCEEDINGS{Moreau:HPDC05,
  AUTHOR = {Paul Groth and Simon Miles and Weijian Fang and Sylvia
                  C. Wong and Klaus-Peter Zauner and Luc Moreau},
  TITLE = {Recording and Using Provenance in a Protein Compressibility Experiment},
  BOOKTITLE = {Proceedings of the 14th IEEE International Symposium on High
                  Performance Distributed Computing (HPDC'05)},
  OPTCROSSREF = {},
  OPTKEY = {},
  PAGES = {201--208},
  YEAR = {2005},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/hpdc05.pdf},
  OPTEDITOR = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTSERIES = {},
  OPTADDRESS = {},
  MONTH = JUL,
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  OPTORGANIZATION = {},
  OPTPUBLISHER = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Very large scale computations are now becoming routinely
used as a methodology to undertake scientific research.
In this context, `provenance systems’ are regarded
as the equivalent of the scientist’s logbook for in silico experimentation:
provenance captures the documentation of
the process that led to some result. Using a protein compressibility
analysis application, we derive a set of generic
use cases for a provenance system. In order to support
these, we address the following fundamental questions:
what is provenance? how to record it? what is the performance
impact for grid execution? what is the performance
of reasoning? In doing so, we define a technologyindependent
notion of provenance that captures interactions
between components, internal component information and
grouping of interactions, so as to allow us to analyse and
reason about the execution of scientific processes. In order
to support persistent provenance in heterogeneous applications,
we introduce a separate provenance store, in
which provenance documentation can be stored, archived
and queried independently of the technology used to run the
application. Through a series of practical tests, we evaluate
the performance impact of such a provenance system. In
summary, we demonstrate that provenance recording overhead
of our prototype system remains under 10\% of execution
time, and we show that the recorded information successfully
supports our use cases in a performant manner.}
}

@TECHREPORT{Moreau:PROV05,
  AUTHOR = {Luc Moreau and Liming Chen and Paul Groth and John Ibbotson and
                  Michael Luck and Simon Miles and Omer Rana and Victor Tan and
                  Willmott and Fenglian Xu},
  TITLE = {Logical architecture strawman for provenance systems},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  INSTITUTION = {University of Southampton},
  URL = {http://eprints.ecs.soton.ac.uk/10796/},
  YEAR = {2005},
  ABSTRACT = {The purpose of this document is to propose a logical architecture for a provenance
system. The logical architecture is specified independently of specific technologies.
Specifically, we introduce our definition of provenance in the context
of service-oriented architectures, and we identify the different roles that exist in
a provenance system.}
}

@INPROCEEDINGS{Groth:AHM05,
  AUTHOR = {Paul Groth and Simon Miles and Luc Moreau},
  TITLE = {PReServ: Provenance Recording for Services},
  BOOKTITLE = {Proceedings of the UK OST e-Science second All Hands Meeting
                  2005 (AHM'05)},
  OPTCROSSREF = {},
  OPTKEY = {},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/Groth-AHM05.pdf},
  PIND = {EZ~03~03~04},
  EXPORT = {yes},
  PASOA = {yes},
  OPTPAGES = {},
  YEAR = 2005,
  OPTEDITOR = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTSERIES = {},
  ADDRESS = {Nottingham,UK},
  MONTH = SEP,
  OPTORGANIZATION = {},
  OPTPUBLISHER = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {
The importance of understanding the process by which a result was generated in
                  an experiment
is fundamental to science. Without such information, other scientists cannot
                  replicate,
validate, or duplicate an experiment. We define provenance as the process that
                  led to a result.
With large scale in-silico experiments, it becomes increasingly difficult for
                  scientists to record
process documentation that can be used to retrieve the provenance of a
                  result. Provenance
Recording for Services (PReServ) is a software package that allows developers
                  to integrate
process documentation recording into their applications. PReServ has been used
                  by several
applications and its performance has been benchmarked.}
}

@INPROCEEDINGS{Wong:AHM05,
  AUTHOR = {Wong, Sylvia C. and Miles, Simon and Fang, Weijian and Groth, Paul and Moreau, Luc},
  TITLE = {{Validation of E-Science Experiments using a Provenance-based Approach}},
  BOOKTITLE = {Proceedings of Fourth All Hands Meeting (AHM'05)},
  YEAR = {2005},
  ADDRESS = {Nottingham},
  MONTH = SEP,
  PIND = {EZ~05~05~04},
  URL = {http://eprints.ecs.soton.ac.uk/11063/},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  MYGRID = {yes},
  GRIMOIRES = {yes},
  SD = {yes},
  ABSTRACT = {E-science experiments typically involve many distributed services maintained by different organisations.
As part of the scientific process, it is important for scientists to be able to verify the
correctness of their own experiments, or to review the correctness of their peers’ work. There is no
existing framework for validating such experiments. Users therefore have to rely on error checking
performed by the services, or adopt other ad hoc methods. This paper introduces a platform independent
framework for validating workflow executions. The validation relies on reasoning over the
documented provenance of experiment results and semantic descriptions of services advertised in a
registry. This validation process ensures experiments are performed correctly, and thus results generated
are meaningful. The framework is tested in a bioinformatics application that performs protein
compressibility analysis.}
}

@INPROCEEDINGS{Wong:ISWC05,
  AUTHOR = {Wong, Sylvia C. and Miles, Simon and Fang, Weijian and Groth, Paul and Moreau, Luc},
  TITLE = {{Provenance-based Validation of E-Science Experiments}},
  BOOKTITLE = {Proceedings of 4th Internation Semantic Web Conference (ISWC'05)},
  YEAR = {2005},
  ADDRESS = {Galway, Ireland},
  MONTH = NOV,
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/iswc05.pdf},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  MYGRID = {yes},
  GRIMOIRES = {yes},
  SD = {yes},
  PAGES = {801--815},
  SERIES = LNCS,
  VOLUME = {3729},
  PUBLISHER = {Springer-Verlag},
  ABSTRACT = {E-Science experiments typically involve many distributed services maintained
by different organisations. After an experiment has been executed, it is useful
for a scientist to verify that the execution was performed correctly or is compatible
with some existing experimental criteria or standards. Scientists may also
want to review and verify experiments performed by their colleagues. There are
no exsiting frameworks for validating such experiments in today’s e-Science systems.
Users therefore have to rely on error checking performed by the services, or
adopt other ad hoc methods. This paper introduces a platform-independent framework
for validating workflow executions. The validation relies on reasoning over
the documented provenance of experiment results and semantic descriptions of
services advertised in a registry. This validation process ensures experiments are
performed correctly, and thus results generated are meaningful. The framework is
tested in a bioinformatics application that performs protein compressibility analysis.}
}

@PROCEEDINGS{Moreau-Foster:IPAW06,
  TITLE = {{Provenance and Annotation of Data --- International Provenance and Annotation Workshop, IPAW 2006}},
  YEAR = {2006},
  OPTKEY = {},
  OPTBOOKTITLE = {},
  EDITOR = {Luc Moreau and Ian Foster},
  VOLUME = {4145},
  SERIES = {Lecture Notes in Computer Science},
  EUPUB = {yes},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  SOCA = {yes},
  OPTADDRESS = {},
  MONTH = MAY,
  OPTORGANIZATION = {},
  PUBLISHER = {Springer-Verlag},
  ISBN = {3-540-46302-X},
  OPTNOTE = {},
  OPTANNOTE = {},
  URL = {http://www.springer.com/uk/home/generic/search/results?SGWID=3-40109-22-173681711-0},
  ABSTRACT = {The International Provenance and Annotation Workshop (IPAW 2006)
            was a follow-up to workshops in Chicago in October 2002 and in
            Edinburgh in December 2003.  It brought together computer
            scientists and domain scientists with a common interest in issues
            of data provenance, process documentation, data derivation, and
            data annotation.  IPAW 2006 was held on May 3-5, 2006 at the
            University of Chicago's Gleacher Center in downtown Chicago and was
            attended by roughly 45 participants.}
}

@INPROCEEDINGS{Branco:IPAW06,
  AUTHOR = {Miguel Branco and Luc Moreau},
  TITLE = {{Enabling provenance on large scale e-Science applications}},
  BOOKTITLE = {Proceedings of the International Provenance and Annotation
                  Workshop (IPAW'06)},
  OPTCROSSREF = {},
  OPTKEY = {},
  PAGES = {55--63},
  YEAR = {2006},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  OPTEDITOR = {},
  VOLUME = {4145},
  OPTNUMBER = {},
  SERIES = LNCS,
  ADDRESS = {Chicago, Illinois},
  ABSTRACT = {Large-scale e-Science experiments present unprecedented data han-dling requirements with their multi-petabyte data storages. Complex software applications, such as the ATLAS High Energy Physics experiment at CERN, run throughout Grid computing sites around the world in a distributed environ-ment, with scientists performing concurrent analysis on data and producing new data products shared among the collaboration. In this paper, we introduce a multi-phase infrastructure to achieve data provenance for an e-Science experi-ment. We propose an infrastructure to integrate provenance onto an existing legacy application with strong emphasis on scalability and explore the relation-ship between provenance and metadata introducing a model where data prove-nance is made available as metadata through a separate reasoning phase.},
  OPTORGANIZATION = {},
  PUBLISHER = {Springer-Verlag},
  OPTNOTE = {},
  OPTANNOTE = {}
}

@INPROCEEDINGS{Tan:IPAW06,
  AUTHOR = {Victor Tan and Paul Groth and Simon Miles and Sheng Jiang and Steve
                  Munroe and Sofia Tsasakou and Luc Moreau},
  TITLE = {{Security Issues in a SOA-based Provenance System}},
  BOOKTITLE = {Proceedings of the International Provenance and Annotation
                  Workshop (IPAW'06)},
  OPTCROSSREF = {},
  OPTKEY = {},
  PAGES = {203--211},
  YEAR = {2006},
  EUPUB = {yes},
  OPTEDITOR = {},
  VOLUME = {4145},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  OPTNUMBER = {},
  OPTSERIES = {},
  ADDRESS = {Chicago, Illinois},
  OPTMONTH = {},
  OPTORGANIZATION = {},
  PUBLISHER = {Springer-Verlag},
  URL = {http://eprints.ecs.soton.ac.uk/12569/},
  OPTNOTE = {},
  ABSTRACT = {Recent work has begun exploring the characterization and
            utilization of provenance in systems based on the Service Oriented
            Architecture (such as Web Services and Grid based
            environments). One of the salient issues related to provenance use
            within any given system is its security. Provenance presents some
            unique security requirements of its own, which are additionally
            dependent on the architectural and environmental context that a
            provenance system operates in. We discuss the security
            considerations pertaining to a Service Oriented Architecture based
            provenance system. Concurrently, we outline possible approaches to
            address them.},
  OPTANNOTE = {}
}

@ARTICLE{Moreau:CACM07,
  AUTHOR = {Luc Moreau and Paul Groth and Simon Miles and Javier Vazquez and
John Ibbotson and Sheng Jiang and Steve Munroe and Omer Rana and 
Andreas Schreiber and Victor Tan and Laszlo Varga},
  TITLE = {{The Provenance of Electronic Data}},
  JOURNAL = {Communications of the ACM},
  YEAR = {2007},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/cacm06.pdf},
  EUPUB = {yes},
  PASOA = {yes},
  OPTKEY = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {In the study of fine art, provenance refers to the documented history
of some art object. Given that documented history, the object attains an
authority that allows scholars to appreciate its importance with respect to
other works, whereas, in the absence of such history, the
object may be treated with some skepticism.  Our IT landscape is evolving as
illustrated by applications that are open, composed dynamically, and that
discover results and services on the fly. Against this challenging background,
it is crucial for users to be able to have confidence in the results produced
by such applications.   If the provenance of data produced by computer
systems could be determined as it can for some works of art, then users,
in their daily applications, would be able to interpret and judge the quality
of data better. We introduce a provenance lifecycle and
advocate an open approach based on two key principles to support a notion of
provenance in computer systems: documentation of execution and user-tailored
provenance queries.}
}

@INPROCEEDINGS{Chen:AHM05,
  AUTHOR = {Liming Chen and Victor Tan and Fenglian Xu and Alexis Biller and Paul Groth and Simon Miles and John Ibbotson and Michael Luck and Luc Moreau},
  TITLE = {{A Proof of Concept: Provenance in a Service Oriented Architecture}},
  BOOKTITLE = {Proceedings of the Fourth All Hands Meeting (AHM)},
  EXPORT = {yes},
  YEAR = {2005},
  MONTH = SEP,
  DISSEMINATION = {public},
  URL = {http://www.allhands.org.uk/2005/proceedings/papers/503.pdf},
  ABSTRACT = {Provenance has been identified as an emerging and important concept within the Grid community
for a variety of purposes, such as verifying or tracing results. We seek to provide a concrete
conception of provenance and its possible utilisation through the process of designing and
implementing a system prototype with some specific provenance requirements. This prototype,
which is based on an idealised recipe for baking a cake, is developed within the context of a
service oriented Grid computing environment and implemented using standard Web Services
technologies. The issues surrounding the design of possible provenance system are also explored.},
  EUPUB = {yes},
  PASOA = {yes}
}

@TECHREPORT{OGSA-Data-Scenarios:GGF,
  AUTHOR = {Stephen Davey and Ali Anjomshoaa and Mario Antonioletti and Malcolm Atkinson and Dave Berry and Ann Chervenak and Adrian Jackson and Chris Jordan and Peter Kunszt and Allen Luniewski and Luc Moreau},
  TITLE = {{OGSA Data Scenarios v0.13}},
  INSTITUTION = {Global Grid Forum},
  YEAR = {2006},
  OPTKEY = {},
  OPTTYPE = {},
  OPTNUMBER = {},
  OPTADDRESS = {},
  OPTMONTH = JUN,
  PROVENANCE = {yes},
  PASOA = {yes},
  EXPORT = {yes},
  EUPUB = {yes},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {This document provides example scenarios of a generic nature to
                  accompany the OGSA Data Architecture document [OGSA Data
                  Arch]. It should be noted that this is not a use case
                  document generating requirements of the OGSA Data
                  Architecture. Instead this document comes from the opposite
                  direction, providing illustrations of how the components and
                  interfaces described in the OGSA Data Architecture document
                  can be put together in a selection of typical data
                  scenarios. {\em This document contains some provenance
                  related scenarios in Section 9.}},
  URL = {https://forge.gridforum.org/sf/go/doc13605?nav=1}
}

@INPROCEEDINGS{Munroe:SEM06,
  AUTHOR = {Munroe, Steve and Miles, Simon and Moreau, Luc and
                  V\'{a}zquez-Salceda,  Javier},
  TITLE = {{Pr{IM}e: A Software Engineering Methodology for Developing Provenance-Aware Applications}},
  BOOKTITLE = {ACM Digital Proceedings of the Software Engineering and Middleware Workshop
 (SEM'06)},
  YEAR = {2006},
  PROVENANCE = {yes},
  PASOA = {yes},
  EXPORT = {yes},
  EUPUB = {yes},
  URL = {http://eprints.ecs.soton.ac.uk/13062/},
  ABSTRACT = {Provenance is a concept often used in the Art world to refer to the
           documented history of an artifact, providing information about the
           artifact’s lineage and authenticity. Provenance-aware applications
           similarly allow their users to have confidence about the data they
           produce, and can enable users to make judgements relating to notions
           of trust, accountability, validation, replication and compliance of
           their data. PrIMe is a software engineering methodology for adapting
           applications to enable them to interact with a provenance middleware
           layer, thereby making them provenance-aware. Such applications allow
           users to answer questions about provenance use cases, which are
           descriptions of scenarios in which a user interacts with a system by
           performing particular functions on that system. In order to
           illustrate how PrIMe can make applications provenance-aware, an
           Organ Transplant Management example application is used.}
}

@ARTICLE{Kifor:IS06,
  AUTHOR = {Tam\'as Kifor and L\'aszl\'o Z. Varga and 
                  Javier V\'azquez-Salceda and Sergio \'Alvarez
                  and Steven Willmott and Simon Miles and Luc Moreau},
  TITLE = {{Provenance in Agent-mediated Healthcare Systems}},
  JOURNAL = {IEEE Intelligent Systems},
  YEAR = {2006},
  OPTKEY = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTPAGES = {},
  MONTH = {Nov/Dec},
  URL = {http://www.gridprovenance.org/publications/ProvenanceInAgentMediatedHealthcareSystems-V19.doc},
  OPTNOTE = {},
  OPTANNOTE = {},
  PROVENANCE = {yes},
  PASOA = {yes},
  EUPUB = {yes},
  EXPORT = {yes},
  ABSTRACT = {Agent-oriented cooperation techniques and standardized electronic
                  healthcare record exchange protocols can be used to combine
                  information regarding different facets of a therapy received
                  by a patient from different healthcare providers at different
                  locations. Provenance is an innovative approach to trace
                  events in complex distributed processes, dependencies between
                  such events, and associated decisions by human actors. We
                  focus on three aspects of provenance in agent-mediated
                  healthcare systems: first, we define the provenance concept
                  and show how it can be applied to agent-mediated healthcare
                  applications; second, we investigate and provide a method for
                  independent and autonomous healthcare agents to document the
                  processes they are involved in without directly interacting
                  with each other; and third, we show that this method solves
                  the privacy issues of provenance in agent-mediated healthcare
                  systems}
}

@ARTICLE{Bose-Foster-Moreau:IPAW06,
  AUTHOR = {Raj Bose and Ian Foster and Luc Moreau},
  TITLE = {{Report on the International Provenance and Annotation Workshop (IPAW’06)}},
  JOURNAL = {Sigmod Records},
  YEAR = {2006},
  OPTKEY = {},
  VOLUME = {35},
  NUMBER = {3},
  PAGES = {51--53},
  MONTH = SEP,
  EXPORT = {yes},
  PROVENANCE = {yes},
  EUPUB = {yes},
  PASOA = {yes},
  SOCA = {yes},
  OPTNOTE = {},
  OPTANNOTE = {},
  URL = {http://www.sigmod.org/sigmod/record/issues/0609/sigmod-record.september2006.pdf},
  ABSTRACT = {The International Provenance and Annotation Workshop (IPAW’06) was
            held May 3-5, 2006 at the University of Chicago’s Gleacher Center
            in downtown Chicago; it was co-chaired by Luc Moreau (University of
            Southampton) and Ian Foster (University of Chicago and Argonne
            National Laboratory) and included roughly 45 participants,
            representing about 25 organizations or projects.}
}

@ARTICLE{Miles:WEBSEM07,
  AUTHOR = {Simon Miles and Sylvia C. Wong and Weijian Fang and Paul
                  Groth and Klaus-Peter Zauner and Luc Moreau},
  TITLE = {Provenance-Based Validation of e-Science Experiments},
  JOURNAL = {Web Semantics: Science, Services and Agents
on the World Wide Web},
  YEAR = {2007},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/WEBSEM07.pdf},
  EXPORT = {yes},
  PROVENANCE = {yes},
  PASOA = {yes},
  MYGRID = {yes},
  GRIMOIRES = {yes},
  SD = {yes},
  OPTKEY = {},
  VOLUME = {5},
  NUMBER = {1},
  ISSN = {1570-8268},
  PAGES = {28--38},
  DOI = {doi:10.1016/j.websem.2006.11.003},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {E-science experiments typically involve many distributed services maintained by different organisations. After an experiment has been executed,
it is useful for a scientist to verify that the execution was performed correctly or is compatible with some existing experimental criteria or standards,
not necessarily anticipated prior to execution. Scientists may also want to review and verify experiments performed by their colleagues. There are
no existing frameworks for validating such experiments in today’s e-science systems. Users therefore have to rely on error checking performed
by the services, or adopt other ad hoc methods. This paper introduces a platform-independent framework for validating workflow executions.
The validation relies on reasoning over the documented provenance of experiment results and semantic descriptions of services advertised in a
registry. This validation process ensures experiments are performed correctly, and thus results generated are meaningful. The framework is tested
in a bioinformatics application that performs protein compressibility analysis.}
}

@ARTICLE{Editorial:Challenge06,
  AUTHOR = {Luc Moreau and 
Bertram Lud\"ascher and 
Ilkay Altintas and
Roger S. Barga and
Shawn Bowers and
Steven Callahan and
George {Chin Jr.} and
Ben Clifford and
Shirley Cohen and 
Sarah Cohen-Boulakia and
Susan Davidson and
Ewa Deelman and
Luciano Digiampietri and
Ian Foster and
Juliana Freire and
James Frew and
Joe Futrelle and
Tara Gibson and 
Yolanda Gil and
Carole Goble and
Jennifer Golbeck and
Paul Groth and
David A. Holland and
Sheng Jiang and
Jihie Kim and
David Koop and
Ales Krenek and
Timothy McPhillips and
Gaurang Mehta and
Simon Miles and
Dominic Metzger and
Steve Munroe and
Jim Myers and
Beth Plale and
Norbert Podhorszki and
Varun Ratnakar and
Emanuele Santos and
Carlos Scheidegger and
Karen Schuchardt and
Margo Seltzer and
Yogesh L. Simmhan and
Claudio Silva and
Peter Slaughter and
Eric Stephan and 
Robert Stevens and
Daniele Turi and 
Huy Vo and
Mike Wilde and
Jun Zhao and
Yong Zhao
},
  TITLE = {{The First Provenance Challenge}},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2007},
  EUPUB = {yes},
  PASOA = {yes},
  EXPORT = {yes},
  OPTKEY = {},
  OPTVOLUME = {in this issue},
  OPTNUMBER = {},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  DOI = {DOI: 10.1002/cpe.1233},
  OPTANNOTE = {},
  SOCA = {yes},
  ABSTRACT = {The first Provenance Challenge was set up in order to provide a forum for the community to help understand the capabilities of different provenance systems
and the expressiveness of their provenance representations.
  To this end, a Functional
            Magnetic Resonance Imaging workflow was defined, which participants
            had to either simulate or run in order to produce some provenance
            representation, from which a set of identified queries had to be
            implemented and executed.  Sixteen teams responded to the
            challenge, and submitted their inputs. In this paper, we present
            the challenge workflow and queries, and summarise the participants
            contributions.}
}

@ARTICLE{OPA:Challenge06,
  AUTHOR = {Simon Miles and Paul Groth and Steve Munroe and Sheng Jiang
                  and Thibaut Assandri and Luc Moreau},
  TITLE = {{Extracting Causal Graphs from an Open Provenance Data Model}},
  JOURNAL = {Concurrency and Computation: Practice and Experience},
  YEAR = {2007},
  EUPUB = {yes},
  PASOA = {yes},
  EXPORT = {yes},
  OPTKEY = {},
  OPTVOLUME = {in this issue},
  OPTNUMBER = {},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {The open provenance architecture (OPA) approach to the challenge
            was distinct in several regards.  In particular, it is based on an
            open, well-defined data model and architecture, allowing different
            components of the challenge workflow to independently record
            documentation, and for the workflow to be executed in any
            environment.  Another noticeable feature is that we distinguish
            between the data recorded about what has occurred, \emph{process
            documentation}, and the \emph{provenance} of a data item, which is
            all that caused the data item to be as it is and is obtained as the
            result of a query over process documentation.  This distinction
            allows us to tailor the system to separately best address the
            requirements of recording and querying documentation.  Other
            notable features include the explicit recording of causal
            relationships between both events and data items, an
            interaction-based world model, intensional definition of data items
            in queries rather than relying on explicit naming mechanisms, and
            \emph{styling} of documentation to support non-functional
            application requirements such as reducing storage costs or ensuring
            privacy of data.  In this paper we describe how each of these
            features aid us in answering the challenge provenance queries.}
}

@INPROCEEDINGS{Miles:AAMAS07,
  AUTHOR = {Simon Miles and Steve Munroe and Michael Luck and Luc Moreau},
  TITLE = {Modelling the Provenance of Data in Autonomous Systems},
  OPTCROSSREF = {},
  OPTKEY = {},
  BOOKTITLE = {Proceedings of the Sixth International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS'07)},
  OPTPAGES = {},
  YEAR = {2007},
  PASOA = {yes},
  EXPORT = {yes},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/aamas07.pdf},
  OPTEDITOR = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTSERIES = {},
  OPTADDRESS = {},
  OPTMONTH = {},
  OPTORGANIZATION = {},
  OPTPUBLISHER = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {Determining the provenance of data, i.e. the process that led to
                  that 
data, is vital in many disciplines. For example, in science, the process
that produced a given result must be demonstrably rigorous for
the result to be deemed reliable. A provenance system supports applications
in recording adequate documentation about process executions
to answer queries regarding provenance, and provides functionality
to perform those queries. Several provenance systems are
being developed, but all focus on systems in which the components
are reactive, for example Web Services that act on the basis
of a request, job submission system, etc. This limitation means that
questions regarding the motives of autonomous actors, or agents, in
such systems remain unanswerable in the general case. Such questions
include: who was ultimately responsible for a given effect,
what was their reason for initiating the process and does the effect
of a process match what was intended to occur by those initiating
the process? In this paper, we address this limitation by integrating
two solutions: a generic, re-usable framework for representing
the provenance of data in service-oriented architectures and a
model for describing the goal-oriented delegation and engagement
of agents in multi-agent systems. Using these solutions, we present
algorithms to answer common questions regarding responsibility
and success of a process and evaluate the approach with a simulated
healthcare example.}
}

@INPROCEEDINGS{Miles:Methodo07,
  AUTHOR = {Simon Miles and Paul Groth and Steve Munroe and Michael Luck and
                  Luc Moreau},
  TITLE = {{AgentPrIMe: Adapting MAS Designs to Build Confidence}},
  OPTCROSSREF = {},
  OPTKEY = {},
  BOOKTITLE = {Agent-Oriented Software Engineering (AOSE'07)},
  OPTPAGES = {},
  YEAR = {2007},
  PASOA = {yes},
  EXPORT = {yes},
  URL = {http://www.ecs.soton.ac.uk/~lavm/papers/aose07.pdf},
  OPTEDITOR = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTSERIES = {},
  OPTADDRESS = {},
  OPTMONTH = {},
  OPTORGANIZATION = {},
  OPTPUBLISHER = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  ABSTRACT = {The products of systems cannot always be judged at face value: the
            process by which they were obtained is also important. For
            instance, the rigour of a scientic experiment, the ethics with
            which an item was manufactured and the use of services with
            particular licens- ing all aect how the results of those
            processes are valued. However, in systems of autonomous agents, and
            particularly those with multiple independent contributory
            organisations, the ability of agents to choose how their goals or
            responsibilities are achieved can hide such process qualities from
            users. The issue of ensuring that users are able to check these
            process qualities is a software engineering one: the developer must
            decide to ensure that adequate data is recorded regarding processes
            and safeguards implemented to ensure accuracy. In this paper, we
            describe AgentPrIMe, an adjunct to existing agent-oriented
            methodologies that allows system designs to be adapted to give
            users condence in the results they produce. It does this by
            adaptations to the design for documenta- tion, corroboration,
            independent storage and accountability.}
}

@INBOOK{Moreau:HPC07,
  AUTHOR = {Paul Groth and Steve Munroe and Simon Miles and Luc Moreau},
  ALTEDITOR = {},
  TITLE = {HPC and Grids in Action},
  CHAPTER = {{Applying the Provenance Data Model to a Bioinformatics Case}},
  PUBLISHER = {IOS Press},
  YEAR = {2007},
  PASOA = {yes},
  OPTKEY = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTSERIES = {},
  OPTTYPE = {},
  OPTADDRESS = {},
  OPTEDITION = {},
  OPTMONTH = {},
  OPTPAGES = {},
  OPTNOTE = {},
  OPTANNOTE = {}
}

@ARTICLE{Groth:TOIT,
  AUTHOR = {Paul Groth and Luc Moreau},
  TITLE = {{A Shared Model for Documentation of Processes Enabling the Determination of Provenance}},
  PASOA = {yes},
  JOURNAL = {Submitted for Publication},
  YEAR = {2007},
  OPTKEY = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {}
}

@ARTICLE{Groth:TPDS07,
  AUTHOR = {Paul Groth and Luc Moreau},
  TITLE = {Recording Process Documentation for Provenance},
  JOURNAL = {Submitted for Publication},
  YEAR = {2007},
  PASOA = {yes},
  OPTKEY = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTPAGES = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {}
}

@INPROCEEDINGS{Miles:eScience07,
  AUTHOR = {Simon Miles and Ewa Deelman and Paul Groth and Karan Vahi and
                  Gaurang Mehta and Luc Moreau},
  TITLE = {Connecting Scientific Data to Scientific Experiments with
                  Provenance},
  OPTCROSSREF = {},
  OPTKEY = {},
  OPTBOOKTITLE = {},
  OPTPAGES = {},
  YEAR = {2007},
  OPTEDITOR = {},
  OPTVOLUME = {},
  OPTNUMBER = {},
  OPTSERIES = {},
  OPTADDRESS = {},
  OPTMONTH = {},
  OPTORGANIZATION = {},
  OPTPUBLISHER = {},
  OPTNOTE = {},
  OPTANNOTE = {},
  SOCA = {yes},
  PASOA = {yes},
  ABSTRACT = {As scientific workflows, and the data they operate on, grow in size and complexity, the task of defining how those workflows should execute (which resources they should use, where those resources should be in preparation for processing etc.) becomes proportionally more difficult. While `workflow compilers', such as Pegasus, aid greatly in reducing this burden, a further problem arises: as specifying the details of execution is now automatic, a workflow's results are harder to interpret, as they are in part due to the specifics of execution. By automating the steps between the original experiment design and its results, we lose the connection between them, making results harder to interpret. To reconnect the scientific data with the original experiment, we argue that scientists should have access to the full provenance of their data, including not only parameters, input data and intermediary results, but also the abstract experiment, refined into a concrete execution by the `workflow compiler'. In this paper, we describe our preliminary work on adapting Pegasus to capture the process of workflow refinement in the PASOA provenance system.}
}

@ARTICLE{miles07determining,
  AUTHOR = {Simon Miles and Luc Moreau},
  TITLE = {Determining Provenance Through Scoped Queries Over Causal Graphs},
  JOURNAL = {To Be Submitted},
  PASOA = {yes},
  YEAR = {2007},
  ABSTRACT = {The provenance of entities, whether electronic data or
physical artefacts,
	is crucial information in practically all domains, including science,
	business and art. The increased use of software in automating our
	activities provides the opportunity to add greatly to the amount
	we can know about an entity's history and the process by which it
	came to be as it is. However, it also presents difficulties: querying
	for the provenance of an entity could potentially return detailed
	information stretching back far into the past, most of it irrelevant
	to the querier. In this paper, we define the concept of a provenance
	query and describe techniques that allow us to perform scoped provenance
	queries, by which a querier can declare in advance exactly what in
	the item's history is relevant to them. Using these techniques, a
	user can discover the provenance of data in the applications they
	use.}
}

@TECHREPORT{opm:2007,
  AUTHOR = {Luc Moreau and Juliana Freire and Jim Myers and Joe Futrelle
                  and Patrick Paulson},
  TITLE = {The Open Provenance Model},
  INSTITUTION = {University of Southampton},
  YEAR = {2007},
  PASOA = {yes},
  OPTKEY = {},
  OPTTYPE = {},
  OPTNUMBER = {},
  OPTADDRESS = {},
  OPTMONTH = {},
  OPTNOTE = {},
  OPTANNOTE = {}
}

@INPROCEEDINGS{Townend2005,
  AUTHOR = {Paul Townend and Paul Groth and  Jie Xu},
  TITLE = {A Provenance-Aware Weighted Fault Tolerance Scheme for Service-Based Applications},
  BOOKTITLE = {Proc. of the 8th IEEE International Symposium on Object-oriented Real-time distributed Computing (ISORC 2005)},
  YEAR = {2005},
  MONTH = MAY,
  PASOA = {yes},
  EXPORT = {yes},
  ABSTRACT = {Service-orientation has been proposed as a
     way of facilitating the development and integration of increasingly
     complex and heterogeneous system components. However, there are many
     new challenges to the dependability community in this new paradigm,
     such as how individual channels within fault-tolerant systems may
     invoke common services as part of their workflow, thus increasing
     the potential for common-mode failure. We propose a scheme that -
     for the first time - links the technique of provenance with that of
     multi-version fault tolerance. We implement a large test system and perform
     experiments with a single-version system, a traditional MVD system, and
     a provenance-aware MVD system, and compare their results. We show
     that for this experiment, our provenance-aware scheme results in a
     much more dependable system than either of the other systems tested,
     whilst imposing a negligible timing overhead. },
  OWNER = {pgroth}
}

@INPROCEEDINGS{Miles:BIOMED05,
  AUTHOR = {Simon Miles},
  TITLE = {Agent-Oriented Data Curation in Bioinformatics},
  BOOKTITLE = {Proceedings of Workshop on Multi-Agent Systems in Medicine, Computational Biology, and Bioinformatics (MAS*BioMed'05)},
  YEAR = {2005},
  MONTH = JUL,
  OWNER = {sm},
  PASOA = {yes},
  EXPORT = {yes},
  PAGES = {157-169},
  ABSTRACT = {Bioinformatics is a fast-growing field in which biological data
        is analysed and shared using software tools. However, due to the
        field’s success, the size and complexity of the data being produced is
        increasing fast. It also means that new, relatively inexperienced
        researchers are constantly being recruited. Together, these
        characteristics make it hard for organisations to ensure that work is
        being undertaken on the best available data and with the best available
        tools.  Several strands of research aim to support the bioinformatics
        community in managing the complexity of their experiments. In our own
        recent work, we have focused on recording the provenance of
        experimental results [8]. The provenance of a piece of data is the
        process that led to that data, and provenance data is the documentation
        of that process. We have determined a number of provenancerelated use
        cases in bioinformatics [11] through interviews with scientists, such
        as the comparison of two experiment runs to determine why results were
        different, and justifying that the experiment was performed in a valid
        way to others. We have provided software to record and maintain
        provenance data in provenance stores.},
  PASOA = {yes},
  URL = {http://eprints.ecs.soton.ac.uk/10853/}
}

@ARTICLE{Miles06,
  AUTHOR = {Simon Miles},
  TITLE = {Agent-Oriented Data Curation in Bioinformatics},
  JOURNAL = {International Transactions on Systems Science and Applications},
  YEAR = {2006},
  PASOA = {yes},
  EXPORT = {yes},
  PAGES = {43--50},
  VOLUME = 1,
  NUMBER = 1,
  ABSTRACT = {Bioinformatics is a fast-growing field in which biological data
        is analysed and shared using software tools. However, due to the
        field’s success, the size and complexity of the data being produced is
        increasing fast. It also means that new, relatively inexperienced
        researchers are constantly being recruited. Together, these
        characteristics make it hard for organisations to ensure that work is
        being undertaken on the best available data and with the best available
        tools.  Several strands of research aim to support the bioinformatics
        community in managing the complexity of their experiments. In our own
        recent work, we have focused on recording the provenance of
        experimental results [8]. The provenance of a piece of data is the
        process that led to that data, and provenance data is the documentation
        of that process. We have determined a number of provenancerelated use
        cases in bioinformatics [11] through interviews with scientists, such
        as the comparison of two experiment runs to determine why results were
        different, and justifying that the experiment was performed in a valid
        way to others. We have provided software to record and maintain
        provenance data in provenance stores.},
  PASOA = {yes},
  URL = {http://eprints.ecs.soton.ac.uk/10853/}
}

@INPROCEEDINGS{Townend2005a,
  AUTHOR = {Paul Townend and Paul Groth and Nik Looker and Jie Xu},
  TITLE = {{FT-Grid: A Fault-Tolerance System for e-Science}},
  BOOKTITLE = {Proceedings of the UK OST e-Science Fourth All Hands Meeting
                  (AHM05)},
  YEAR = {2005},
  PASOA = {yes},
  PUBLISH = {yes},
  MONTH = {September},
  OWNER = {pgroth},
  ABSTRACT = {The size and complexity of many e-Science applications suggests
                  that they may be very prone to errors and
 failures; the cost of recovering from failures may also be high. The
 FT-Grid system, developed as part of the e-Demand project at the
 University of Leeds [1], introduces a replication-based fault tolerance
 scheme that allows faults occurring in service-based systems to be
 tolerated, thus increasing the dependability of such systems. This paper
 details the progress that has been made in the development of FT-Grid,
 including both a GUI client and also an FT-Grid web service interface.
 We show empirical evidence of the dependability benefits offered
 by FT-Grid, by performing a dependability analysis on the results
 of fault injection testing performed with the WS-FIT tool at the
 University of Durham. We then illustrate a potential problem with voting
 based fault tolerance approaches in the service-oriented paradigm .
 namely, that individual channels within fault-tolerant systems may
 invoke common services as part of their workflow, thus increasing
 the potential for commonmode failure. We propose a solution to this
 issue by using the technique of provenance to provide FT-Grid with
 topological awareness. We implement a large test system, and - with
 the use of the PreServ provenance system developed as part of the
 PASOA e-Science project at the University of Southampton - perform
 a large number of experiments which show that a provenance-aware
 FTGrid results in a much more dependable system than any of the other
 configurations tested, whilst imposing a negligible timing overhead.},
  URL = {./mypapers/ahm-ftgrid-3.pdf}
}

@INPROCEEDINGS{Groth:IPAW06,
  AUTHOR = {Paul Groth and Simon Miles and Steve Munroe},
  TITLE = {{Principles of High Quality Documentation for Provenance: A
                  Philosophical Discussion}},
  BOOKTITLE = {International Provenance and Annotation Workshop (IPAW'06),},
  YEAR = {2006},
  EDITOR = {Luc Moreau and Ian Foster},
  VOLUME = {4145},
  SERIES = {Lecture Notes in Computer Science},
  MONTH = {May},
  PUBLISHER = {Springer},
  URL = {http://eprints.ecs.soton.ac.uk/12568/},
  EXPORT = {yes},
  PASOA = {yes},
  EUPUB = {yes},
  PAGES = {278--286},
  OWNER = {pgroth},
  ABSTRACT = {Computer technology enables the creation of
     detailed documentation about the processes that create or affect
     entities (data, objects, etc.). Such documentation of the past can be
     used to answer various kinds of questions regarding the processes
     that led to the creation or modification of a particular entity. The
     answer to such questions are known as an entity’s provenance. In this
     paper, we derive a number of principles for documenting the past,
     grounded in work from philosophy and history, which allow for provenance
     questions to be answered within a computational context. These principles
     lead us to argue that an interaction-based model is particularly
     suited for representing high quality documentation of the past. }
}

@INPROCEEDINGS{Miles:IPAW06,
  AUTHOR = {Simon Miles},
  TITLE = {{Electronically Querying for the Provenance of Entities}},
  BOOKTITLE = {Proceedings of the International Provenance and Annotation Workshop
	2006 (IPAW 2006)},
  YEAR = {2006},
  PASOA = {yes},
  PAGES = {184-192},
  SERIES = {Lecture Notes in Computer Science},
  PUBLISHER = {Springer},
  ABSTRACT = {The provenance of entities, whether electronic data or physical artefacts,
	is crucial information in practically all domains, including science,
	business and art. The increased use of software in automating activities
	provides the opportunity to add greatly to the amount we can know
	about an entity’s history and the process by which it came to be
	as it is. However, it also presents difficulties: querying for the
	provenance of an entity could potentially return detailed information
	stretching back to the beginning of time, and most of it possibly
	irrelevant to the querier. In this paper, we define the concept
	of provenance query and describe techniques that allow us to perform
	scoped provenance queries.},
  URL = {http://eprints.ecs.soton.ac.uk/12567/}
}

@PHDTHESIS{Groth:PhdThesis07,
  AUTHOR = {Paul Groth},
  EXPORT = {yes},
  PASOA = {yes},
  TITLE = {The Origin of Data: Enabling the Determination of Provenance in Multi-institutional Scientific Systems through the Documentation of Processes},
  SCHOOL = {University of Southampton},
  URL = {},
  MONTH = {},
  YEAR = {2007}
}

<script src='https://archive-bar.soton.ac.uk/archive-bar.js'></script>
<script src='https://archive-bar.soton.ac.uk/google-analytics.js'></script>