@COMMENT{{This file has been generated by bib2bib 1.79}} @COMMENT{{Command line: 'c:\Documents and Settings\lavm\My Documents\Luc\bib\html\bib2bib.exe' -ob pasoa.bib -oc pasoa.keys -c 'pasoa = "yes"' ../lm.bib ../team.bib}} @STRING{LNCS = {Lecture Notes in Computer Science}} @INPROCEEDINGS{Groth:AHM04, AUTHOR = {Paul Groth and Michael Luck and Luc Moreau}, TITLE = {Formalising a protocol for recording provenance in Grids}, OPTCROSSREF = {}, OPTKEY = {}, BOOKTITLE = {Proceedings of the UK OST e-Science second All Hands Meeting 2004 (AHM'04)}, OPTPAGES = {}, PAGECOUNT = {8}, YEAR = 2004, PASOA = {yes}, PIND = {EZ~03~03~04}, EXPORT = {yes}, OPTEDITOR = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTSERIES = {}, ADDRESS = {Nottingham, UK}, MONTH = SEP, OPTORGANIZATION = {}, OPTPUBLISHER = {}, OPTNOTE = {}, OPTANNOTE = {}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/ahm04-groth.pdf}, ABSTRACT = {Both the scientific and business communities are beginning to rely on Grids as problemsolving mechanisms. These communities also have requirements in terms of provenance. Provenance is the documentation of process and the necessity for it is apparent in fields ranging from medicine to aerospace. To support provenance capture in Grids, we have developed an implementation-independent protocol for the recording of provenance. We describe the protocol in the context of a service-oriented architecture and formalise the entities involved using an abstract state machine or a three-dimensional state transition diagram. Using these techniques we sketch a liveness property for the system.} } @INPROCEEDINGS{Groth:OPODIS04, AUTHOR = {Paul Groth and Michael Luck and Luc Moreau}, TITLE = {A protocol for recording provenance in service-oriented Grids}, BOOKTITLE = {Proceedings of the 8th International Conference on Principles of Distributed Systems (OPODIS'04)}, OPTCROSSREF = {}, OPTKEY = {}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/opodis04.pdf}, PIND = {EZ~03~03~04}, EXPORT = {yes}, PASOA = {yes}, PROVENANCE = {yes}, PAGES = {124--139}, YEAR = 2004, OPTEDITOR = {}, VOLUME = {3544}, OPTNUMBER = {}, SERIES = LNCS, ADDRESS = {Grenoble, France}, MONTH = DEC, ISBN = {3-540-27324-7}, OPTORGANIZATION = {}, PUBLISHER = {Springer-Verlag}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {Both the scientific and business communities, which are beginning to rely on Grids as problem-solving mechanisms, have requirements in terms of provenance. The provenance of some data is the documentation of process that led to the data; its necessity is apparent in fields ranging from medicine to aerospace. To support provenance capture in Grids, we have developed an implementation-independent protocol for the recording of provenance. We describe the protocol in the context of a service-oriented architecture and formalise the entities involved using an abstract state machine or a three-dimensional state transition diagram. Using these techniques we sketch a liveness property for the system.} } @ARTICLE{Miles:JOGC06, AUTHOR = {Simon Miles and Paul Groth and Miguel Branco and Luc Moreau}, TITLE = {The requirements of recording and using provenance in e-Science experiments}, JOURNAL = {Journal of Grid Computing}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, URL = {http://eprints.ecs.soton.ac.uk/10269/}, DOI = {10.1007/s10723-006-9055-3}, VOLUME = {5}, NUMBER = {1}, PAGES = {1--25}, YEAR = {2007}, ABSTRACT = {In e-Science experiments, it is vital to record the experimental process for later use such as in interpreting results, verifying that the correct process took place or tracing where data came from. The documentation of a process that led to some data is called the provenance of that data, and a provenance architecture is the software architecture for a system that will provide the necessary functionality to record, store and use provenance data. However, there has been little principled analysis of what is actually required of a provenance architecture, so it is impossible to determine the functionality they would ideally support. In this paper, we present use cases for a provenance architecture from current experiments in biology, chemistry, physics and computer science, and analyse the use cases to determine the technical requirements of a generic, application-independent architecture. We propose an architecture that meets these requirements and evaluate a preliminary implementation by attempting to realise one of the use cases.} } @INPROCEEDINGS{Moreau:HPDC05, AUTHOR = {Paul Groth and Simon Miles and Weijian Fang and Sylvia C. Wong and Klaus-Peter Zauner and Luc Moreau}, TITLE = {Recording and Using Provenance in a Protein Compressibility Experiment}, BOOKTITLE = {Proceedings of the 14th IEEE International Symposium on High Performance Distributed Computing (HPDC'05)}, OPTCROSSREF = {}, OPTKEY = {}, PAGES = {201--208}, YEAR = {2005}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/hpdc05.pdf}, OPTEDITOR = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTSERIES = {}, OPTADDRESS = {}, MONTH = JUL, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, OPTORGANIZATION = {}, OPTPUBLISHER = {}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {Very large scale computations are now becoming routinely used as a methodology to undertake scientific research. In this context, `provenance systems’ are regarded as the equivalent of the scientist’s logbook for in silico experimentation: provenance captures the documentation of the process that led to some result. Using a protein compressibility analysis application, we derive a set of generic use cases for a provenance system. In order to support these, we address the following fundamental questions: what is provenance? how to record it? what is the performance impact for grid execution? what is the performance of reasoning? In doing so, we define a technologyindependent notion of provenance that captures interactions between components, internal component information and grouping of interactions, so as to allow us to analyse and reason about the execution of scientific processes. In order to support persistent provenance in heterogeneous applications, we introduce a separate provenance store, in which provenance documentation can be stored, archived and queried independently of the technology used to run the application. Through a series of practical tests, we evaluate the performance impact of such a provenance system. In summary, we demonstrate that provenance recording overhead of our prototype system remains under 10\% of execution time, and we show that the recorded information successfully supports our use cases in a performant manner.} } @TECHREPORT{Moreau:PROV05, AUTHOR = {Luc Moreau and Liming Chen and Paul Groth and John Ibbotson and Michael Luck and Simon Miles and Omer Rana and Victor Tan and Willmott and Fenglian Xu}, TITLE = {Logical architecture strawman for provenance systems}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, INSTITUTION = {University of Southampton}, URL = {http://eprints.ecs.soton.ac.uk/10796/}, YEAR = {2005}, ABSTRACT = {The purpose of this document is to propose a logical architecture for a provenance system. The logical architecture is specified independently of specific technologies. Specifically, we introduce our definition of provenance in the context of service-oriented architectures, and we identify the different roles that exist in a provenance system.} } @INPROCEEDINGS{Groth:AHM05, AUTHOR = {Paul Groth and Simon Miles and Luc Moreau}, TITLE = {PReServ: Provenance Recording for Services}, BOOKTITLE = {Proceedings of the UK OST e-Science second All Hands Meeting 2005 (AHM'05)}, OPTCROSSREF = {}, OPTKEY = {}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/Groth-AHM05.pdf}, PIND = {EZ~03~03~04}, EXPORT = {yes}, PASOA = {yes}, OPTPAGES = {}, YEAR = 2005, OPTEDITOR = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTSERIES = {}, ADDRESS = {Nottingham,UK}, MONTH = SEP, OPTORGANIZATION = {}, OPTPUBLISHER = {}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = { The importance of understanding the process by which a result was generated in an experiment is fundamental to science. Without such information, other scientists cannot replicate, validate, or duplicate an experiment. We define provenance as the process that led to a result. With large scale in-silico experiments, it becomes increasingly difficult for scientists to record process documentation that can be used to retrieve the provenance of a result. Provenance Recording for Services (PReServ) is a software package that allows developers to integrate process documentation recording into their applications. PReServ has been used by several applications and its performance has been benchmarked.} } @INPROCEEDINGS{Wong:AHM05, AUTHOR = {Wong, Sylvia C. and Miles, Simon and Fang, Weijian and Groth, Paul and Moreau, Luc}, TITLE = {{Validation of E-Science Experiments using a Provenance-based Approach}}, BOOKTITLE = {Proceedings of Fourth All Hands Meeting (AHM'05)}, YEAR = {2005}, ADDRESS = {Nottingham}, MONTH = SEP, PIND = {EZ~05~05~04}, URL = {http://eprints.ecs.soton.ac.uk/11063/}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, MYGRID = {yes}, GRIMOIRES = {yes}, SD = {yes}, ABSTRACT = {E-science experiments typically involve many distributed services maintained by different organisations. As part of the scientific process, it is important for scientists to be able to verify the correctness of their own experiments, or to review the correctness of their peers’ work. There is no existing framework for validating such experiments. Users therefore have to rely on error checking performed by the services, or adopt other ad hoc methods. This paper introduces a platform independent framework for validating workflow executions. The validation relies on reasoning over the documented provenance of experiment results and semantic descriptions of services advertised in a registry. This validation process ensures experiments are performed correctly, and thus results generated are meaningful. The framework is tested in a bioinformatics application that performs protein compressibility analysis.} } @INPROCEEDINGS{Wong:ISWC05, AUTHOR = {Wong, Sylvia C. and Miles, Simon and Fang, Weijian and Groth, Paul and Moreau, Luc}, TITLE = {{Provenance-based Validation of E-Science Experiments}}, BOOKTITLE = {Proceedings of 4th Internation Semantic Web Conference (ISWC'05)}, YEAR = {2005}, ADDRESS = {Galway, Ireland}, MONTH = NOV, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/iswc05.pdf}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, MYGRID = {yes}, GRIMOIRES = {yes}, SD = {yes}, PAGES = {801--815}, SERIES = LNCS, VOLUME = {3729}, PUBLISHER = {Springer-Verlag}, ABSTRACT = {E-Science experiments typically involve many distributed services maintained by different organisations. After an experiment has been executed, it is useful for a scientist to verify that the execution was performed correctly or is compatible with some existing experimental criteria or standards. Scientists may also want to review and verify experiments performed by their colleagues. There are no exsiting frameworks for validating such experiments in today’s e-Science systems. Users therefore have to rely on error checking performed by the services, or adopt other ad hoc methods. This paper introduces a platform-independent framework for validating workflow executions. The validation relies on reasoning over the documented provenance of experiment results and semantic descriptions of services advertised in a registry. This validation process ensures experiments are performed correctly, and thus results generated are meaningful. The framework is tested in a bioinformatics application that performs protein compressibility analysis.} } @PROCEEDINGS{Moreau-Foster:IPAW06, TITLE = {{Provenance and Annotation of Data --- International Provenance and Annotation Workshop, IPAW 2006}}, YEAR = {2006}, OPTKEY = {}, OPTBOOKTITLE = {}, EDITOR = {Luc Moreau and Ian Foster}, VOLUME = {4145}, SERIES = {Lecture Notes in Computer Science}, EUPUB = {yes}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, SOCA = {yes}, OPTADDRESS = {}, MONTH = MAY, OPTORGANIZATION = {}, PUBLISHER = {Springer-Verlag}, ISBN = {3-540-46302-X}, OPTNOTE = {}, OPTANNOTE = {}, URL = {http://www.springer.com/uk/home/generic/search/results?SGWID=3-40109-22-173681711-0}, ABSTRACT = {The International Provenance and Annotation Workshop (IPAW 2006) was a follow-up to workshops in Chicago in October 2002 and in Edinburgh in December 2003. It brought together computer scientists and domain scientists with a common interest in issues of data provenance, process documentation, data derivation, and data annotation. IPAW 2006 was held on May 3-5, 2006 at the University of Chicago's Gleacher Center in downtown Chicago and was attended by roughly 45 participants.} } @INPROCEEDINGS{Branco:IPAW06, AUTHOR = {Miguel Branco and Luc Moreau}, TITLE = {{Enabling provenance on large scale e-Science applications}}, BOOKTITLE = {Proceedings of the International Provenance and Annotation Workshop (IPAW'06)}, OPTCROSSREF = {}, OPTKEY = {}, PAGES = {55--63}, YEAR = {2006}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, OPTEDITOR = {}, VOLUME = {4145}, OPTNUMBER = {}, SERIES = LNCS, ADDRESS = {Chicago, Illinois}, ABSTRACT = {Large-scale e-Science experiments present unprecedented data han-dling requirements with their multi-petabyte data storages. Complex software applications, such as the ATLAS High Energy Physics experiment at CERN, run throughout Grid computing sites around the world in a distributed environ-ment, with scientists performing concurrent analysis on data and producing new data products shared among the collaboration. In this paper, we introduce a multi-phase infrastructure to achieve data provenance for an e-Science experi-ment. We propose an infrastructure to integrate provenance onto an existing legacy application with strong emphasis on scalability and explore the relation-ship between provenance and metadata introducing a model where data prove-nance is made available as metadata through a separate reasoning phase.}, OPTORGANIZATION = {}, PUBLISHER = {Springer-Verlag}, OPTNOTE = {}, OPTANNOTE = {} } @INPROCEEDINGS{Tan:IPAW06, AUTHOR = {Victor Tan and Paul Groth and Simon Miles and Sheng Jiang and Steve Munroe and Sofia Tsasakou and Luc Moreau}, TITLE = {{Security Issues in a SOA-based Provenance System}}, BOOKTITLE = {Proceedings of the International Provenance and Annotation Workshop (IPAW'06)}, OPTCROSSREF = {}, OPTKEY = {}, PAGES = {203--211}, YEAR = {2006}, EUPUB = {yes}, OPTEDITOR = {}, VOLUME = {4145}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, OPTNUMBER = {}, OPTSERIES = {}, ADDRESS = {Chicago, Illinois}, OPTMONTH = {}, OPTORGANIZATION = {}, PUBLISHER = {Springer-Verlag}, URL = {http://eprints.ecs.soton.ac.uk/12569/}, OPTNOTE = {}, ABSTRACT = {Recent work has begun exploring the characterization and utilization of provenance in systems based on the Service Oriented Architecture (such as Web Services and Grid based environments). One of the salient issues related to provenance use within any given system is its security. Provenance presents some unique security requirements of its own, which are additionally dependent on the architectural and environmental context that a provenance system operates in. We discuss the security considerations pertaining to a Service Oriented Architecture based provenance system. Concurrently, we outline possible approaches to address them.}, OPTANNOTE = {} } @ARTICLE{Moreau:CACM07, AUTHOR = {Luc Moreau and Paul Groth and Simon Miles and Javier Vazquez and John Ibbotson and Sheng Jiang and Steve Munroe and Omer Rana and Andreas Schreiber and Victor Tan and Laszlo Varga}, TITLE = {{The Provenance of Electronic Data}}, JOURNAL = {Communications of the ACM}, YEAR = {2007}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/cacm06.pdf}, EUPUB = {yes}, PASOA = {yes}, OPTKEY = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTPAGES = {}, OPTMONTH = {}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {In the study of fine art, provenance refers to the documented history of some art object. Given that documented history, the object attains an authority that allows scholars to appreciate its importance with respect to other works, whereas, in the absence of such history, the object may be treated with some skepticism. Our IT landscape is evolving as illustrated by applications that are open, composed dynamically, and that discover results and services on the fly. Against this challenging background, it is crucial for users to be able to have confidence in the results produced by such applications. If the provenance of data produced by computer systems could be determined as it can for some works of art, then users, in their daily applications, would be able to interpret and judge the quality of data better. We introduce a provenance lifecycle and advocate an open approach based on two key principles to support a notion of provenance in computer systems: documentation of execution and user-tailored provenance queries.} } @INPROCEEDINGS{Chen:AHM05, AUTHOR = {Liming Chen and Victor Tan and Fenglian Xu and Alexis Biller and Paul Groth and Simon Miles and John Ibbotson and Michael Luck and Luc Moreau}, TITLE = {{A Proof of Concept: Provenance in a Service Oriented Architecture}}, BOOKTITLE = {Proceedings of the Fourth All Hands Meeting (AHM)}, EXPORT = {yes}, YEAR = {2005}, MONTH = SEP, DISSEMINATION = {public}, URL = {http://www.allhands.org.uk/2005/proceedings/papers/503.pdf}, ABSTRACT = {Provenance has been identified as an emerging and important concept within the Grid community for a variety of purposes, such as verifying or tracing results. We seek to provide a concrete conception of provenance and its possible utilisation through the process of designing and implementing a system prototype with some specific provenance requirements. This prototype, which is based on an idealised recipe for baking a cake, is developed within the context of a service oriented Grid computing environment and implemented using standard Web Services technologies. The issues surrounding the design of possible provenance system are also explored.}, EUPUB = {yes}, PASOA = {yes} } @TECHREPORT{OGSA-Data-Scenarios:GGF, AUTHOR = {Stephen Davey and Ali Anjomshoaa and Mario Antonioletti and Malcolm Atkinson and Dave Berry and Ann Chervenak and Adrian Jackson and Chris Jordan and Peter Kunszt and Allen Luniewski and Luc Moreau}, TITLE = {{OGSA Data Scenarios v0.13}}, INSTITUTION = {Global Grid Forum}, YEAR = {2006}, OPTKEY = {}, OPTTYPE = {}, OPTNUMBER = {}, OPTADDRESS = {}, OPTMONTH = JUN, PROVENANCE = {yes}, PASOA = {yes}, EXPORT = {yes}, EUPUB = {yes}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {This document provides example scenarios of a generic nature to accompany the OGSA Data Architecture document [OGSA Data Arch]. It should be noted that this is not a use case document generating requirements of the OGSA Data Architecture. Instead this document comes from the opposite direction, providing illustrations of how the components and interfaces described in the OGSA Data Architecture document can be put together in a selection of typical data scenarios. {\em This document contains some provenance related scenarios in Section 9.}}, URL = {https://forge.gridforum.org/sf/go/doc13605?nav=1} } @INPROCEEDINGS{Munroe:SEM06, AUTHOR = {Munroe, Steve and Miles, Simon and Moreau, Luc and V\'{a}zquez-Salceda, Javier}, TITLE = {{Pr{IM}e: A Software Engineering Methodology for Developing Provenance-Aware Applications}}, BOOKTITLE = {ACM Digital Proceedings of the Software Engineering and Middleware Workshop (SEM'06)}, YEAR = {2006}, PROVENANCE = {yes}, PASOA = {yes}, EXPORT = {yes}, EUPUB = {yes}, URL = {http://eprints.ecs.soton.ac.uk/13062/}, ABSTRACT = {Provenance is a concept often used in the Art world to refer to the documented history of an artifact, providing information about the artifact’s lineage and authenticity. Provenance-aware applications similarly allow their users to have confidence about the data they produce, and can enable users to make judgements relating to notions of trust, accountability, validation, replication and compliance of their data. PrIMe is a software engineering methodology for adapting applications to enable them to interact with a provenance middleware layer, thereby making them provenance-aware. Such applications allow users to answer questions about provenance use cases, which are descriptions of scenarios in which a user interacts with a system by performing particular functions on that system. In order to illustrate how PrIMe can make applications provenance-aware, an Organ Transplant Management example application is used.} } @ARTICLE{Kifor:IS06, AUTHOR = {Tam\'as Kifor and L\'aszl\'o Z. Varga and Javier V\'azquez-Salceda and Sergio \'Alvarez and Steven Willmott and Simon Miles and Luc Moreau}, TITLE = {{Provenance in Agent-mediated Healthcare Systems}}, JOURNAL = {IEEE Intelligent Systems}, YEAR = {2006}, OPTKEY = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTPAGES = {}, MONTH = {Nov/Dec}, URL = {http://www.gridprovenance.org/publications/ProvenanceInAgentMediatedHealthcareSystems-V19.doc}, OPTNOTE = {}, OPTANNOTE = {}, PROVENANCE = {yes}, PASOA = {yes}, EUPUB = {yes}, EXPORT = {yes}, ABSTRACT = {Agent-oriented cooperation techniques and standardized electronic healthcare record exchange protocols can be used to combine information regarding different facets of a therapy received by a patient from different healthcare providers at different locations. Provenance is an innovative approach to trace events in complex distributed processes, dependencies between such events, and associated decisions by human actors. We focus on three aspects of provenance in agent-mediated healthcare systems: first, we define the provenance concept and show how it can be applied to agent-mediated healthcare applications; second, we investigate and provide a method for independent and autonomous healthcare agents to document the processes they are involved in without directly interacting with each other; and third, we show that this method solves the privacy issues of provenance in agent-mediated healthcare systems} } @ARTICLE{Bose-Foster-Moreau:IPAW06, AUTHOR = {Raj Bose and Ian Foster and Luc Moreau}, TITLE = {{Report on the International Provenance and Annotation Workshop (IPAW’06)}}, JOURNAL = {Sigmod Records}, YEAR = {2006}, OPTKEY = {}, VOLUME = {35}, NUMBER = {3}, PAGES = {51--53}, MONTH = SEP, EXPORT = {yes}, PROVENANCE = {yes}, EUPUB = {yes}, PASOA = {yes}, SOCA = {yes}, OPTNOTE = {}, OPTANNOTE = {}, URL = {http://www.sigmod.org/sigmod/record/issues/0609/sigmod-record.september2006.pdf}, ABSTRACT = {The International Provenance and Annotation Workshop (IPAW’06) was held May 3-5, 2006 at the University of Chicago’s Gleacher Center in downtown Chicago; it was co-chaired by Luc Moreau (University of Southampton) and Ian Foster (University of Chicago and Argonne National Laboratory) and included roughly 45 participants, representing about 25 organizations or projects.} } @ARTICLE{Miles:WEBSEM07, AUTHOR = {Simon Miles and Sylvia C. Wong and Weijian Fang and Paul Groth and Klaus-Peter Zauner and Luc Moreau}, TITLE = {Provenance-Based Validation of e-Science Experiments}, JOURNAL = {Web Semantics: Science, Services and Agents on the World Wide Web}, YEAR = {2007}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/WEBSEM07.pdf}, EXPORT = {yes}, PROVENANCE = {yes}, PASOA = {yes}, MYGRID = {yes}, GRIMOIRES = {yes}, SD = {yes}, OPTKEY = {}, VOLUME = {5}, NUMBER = {1}, ISSN = {1570-8268}, PAGES = {28--38}, DOI = {doi:10.1016/j.websem.2006.11.003}, OPTMONTH = {}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {E-science experiments typically involve many distributed services maintained by different organisations. After an experiment has been executed, it is useful for a scientist to verify that the execution was performed correctly or is compatible with some existing experimental criteria or standards, not necessarily anticipated prior to execution. Scientists may also want to review and verify experiments performed by their colleagues. There are no existing frameworks for validating such experiments in today’s e-science systems. Users therefore have to rely on error checking performed by the services, or adopt other ad hoc methods. This paper introduces a platform-independent framework for validating workflow executions. The validation relies on reasoning over the documented provenance of experiment results and semantic descriptions of services advertised in a registry. This validation process ensures experiments are performed correctly, and thus results generated are meaningful. The framework is tested in a bioinformatics application that performs protein compressibility analysis.} } @ARTICLE{Editorial:Challenge06, AUTHOR = {Luc Moreau and Bertram Lud\"ascher and Ilkay Altintas and Roger S. Barga and Shawn Bowers and Steven Callahan and George {Chin Jr.} and Ben Clifford and Shirley Cohen and Sarah Cohen-Boulakia and Susan Davidson and Ewa Deelman and Luciano Digiampietri and Ian Foster and Juliana Freire and James Frew and Joe Futrelle and Tara Gibson and Yolanda Gil and Carole Goble and Jennifer Golbeck and Paul Groth and David A. Holland and Sheng Jiang and Jihie Kim and David Koop and Ales Krenek and Timothy McPhillips and Gaurang Mehta and Simon Miles and Dominic Metzger and Steve Munroe and Jim Myers and Beth Plale and Norbert Podhorszki and Varun Ratnakar and Emanuele Santos and Carlos Scheidegger and Karen Schuchardt and Margo Seltzer and Yogesh L. Simmhan and Claudio Silva and Peter Slaughter and Eric Stephan and Robert Stevens and Daniele Turi and Huy Vo and Mike Wilde and Jun Zhao and Yong Zhao }, TITLE = {{The First Provenance Challenge}}, JOURNAL = {Concurrency and Computation: Practice and Experience}, YEAR = {2007}, EUPUB = {yes}, PASOA = {yes}, EXPORT = {yes}, OPTKEY = {}, OPTVOLUME = {in this issue}, OPTNUMBER = {}, OPTPAGES = {}, OPTMONTH = {}, OPTNOTE = {}, DOI = {DOI: 10.1002/cpe.1233}, OPTANNOTE = {}, SOCA = {yes}, ABSTRACT = {The first Provenance Challenge was set up in order to provide a forum for the community to help understand the capabilities of different provenance systems and the expressiveness of their provenance representations. To this end, a Functional Magnetic Resonance Imaging workflow was defined, which participants had to either simulate or run in order to produce some provenance representation, from which a set of identified queries had to be implemented and executed. Sixteen teams responded to the challenge, and submitted their inputs. In this paper, we present the challenge workflow and queries, and summarise the participants contributions.} } @ARTICLE{OPA:Challenge06, AUTHOR = {Simon Miles and Paul Groth and Steve Munroe and Sheng Jiang and Thibaut Assandri and Luc Moreau}, TITLE = {{Extracting Causal Graphs from an Open Provenance Data Model}}, JOURNAL = {Concurrency and Computation: Practice and Experience}, YEAR = {2007}, EUPUB = {yes}, PASOA = {yes}, EXPORT = {yes}, OPTKEY = {}, OPTVOLUME = {in this issue}, OPTNUMBER = {}, OPTPAGES = {}, OPTMONTH = {}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {The open provenance architecture (OPA) approach to the challenge was distinct in several regards. In particular, it is based on an open, well-defined data model and architecture, allowing different components of the challenge workflow to independently record documentation, and for the workflow to be executed in any environment. Another noticeable feature is that we distinguish between the data recorded about what has occurred, \emph{process documentation}, and the \emph{provenance} of a data item, which is all that caused the data item to be as it is and is obtained as the result of a query over process documentation. This distinction allows us to tailor the system to separately best address the requirements of recording and querying documentation. Other notable features include the explicit recording of causal relationships between both events and data items, an interaction-based world model, intensional definition of data items in queries rather than relying on explicit naming mechanisms, and \emph{styling} of documentation to support non-functional application requirements such as reducing storage costs or ensuring privacy of data. In this paper we describe how each of these features aid us in answering the challenge provenance queries.} } @INPROCEEDINGS{Miles:AAMAS07, AUTHOR = {Simon Miles and Steve Munroe and Michael Luck and Luc Moreau}, TITLE = {Modelling the Provenance of Data in Autonomous Systems}, OPTCROSSREF = {}, OPTKEY = {}, BOOKTITLE = {Proceedings of the Sixth International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS'07)}, OPTPAGES = {}, YEAR = {2007}, PASOA = {yes}, EXPORT = {yes}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/aamas07.pdf}, OPTEDITOR = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTSERIES = {}, OPTADDRESS = {}, OPTMONTH = {}, OPTORGANIZATION = {}, OPTPUBLISHER = {}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {Determining the provenance of data, i.e. the process that led to that data, is vital in many disciplines. For example, in science, the process that produced a given result must be demonstrably rigorous for the result to be deemed reliable. A provenance system supports applications in recording adequate documentation about process executions to answer queries regarding provenance, and provides functionality to perform those queries. Several provenance systems are being developed, but all focus on systems in which the components are reactive, for example Web Services that act on the basis of a request, job submission system, etc. This limitation means that questions regarding the motives of autonomous actors, or agents, in such systems remain unanswerable in the general case. Such questions include: who was ultimately responsible for a given effect, what was their reason for initiating the process and does the effect of a process match what was intended to occur by those initiating the process? In this paper, we address this limitation by integrating two solutions: a generic, re-usable framework for representing the provenance of data in service-oriented architectures and a model for describing the goal-oriented delegation and engagement of agents in multi-agent systems. Using these solutions, we present algorithms to answer common questions regarding responsibility and success of a process and evaluate the approach with a simulated healthcare example.} } @INPROCEEDINGS{Miles:Methodo07, AUTHOR = {Simon Miles and Paul Groth and Steve Munroe and Michael Luck and Luc Moreau}, TITLE = {{AgentPrIMe: Adapting MAS Designs to Build Confidence}}, OPTCROSSREF = {}, OPTKEY = {}, BOOKTITLE = {Agent-Oriented Software Engineering (AOSE'07)}, OPTPAGES = {}, YEAR = {2007}, PASOA = {yes}, EXPORT = {yes}, URL = {http://www.ecs.soton.ac.uk/~lavm/papers/aose07.pdf}, OPTEDITOR = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTSERIES = {}, OPTADDRESS = {}, OPTMONTH = {}, OPTORGANIZATION = {}, OPTPUBLISHER = {}, OPTNOTE = {}, OPTANNOTE = {}, ABSTRACT = {The products of systems cannot always be judged at face value: the process by which they were obtained is also important. For instance, the rigour of a scienti c experiment, the ethics with which an item was manufactured and the use of services with particular licens- ing all a ect how the results of those processes are valued. However, in systems of autonomous agents, and particularly those with multiple independent contributory organisations, the ability of agents to choose how their goals or responsibilities are achieved can hide such process qualities from users. The issue of ensuring that users are able to check these process qualities is a software engineering one: the developer must decide to ensure that adequate data is recorded regarding processes and safeguards implemented to ensure accuracy. In this paper, we describe AgentPrIMe, an adjunct to existing agent-oriented methodologies that allows system designs to be adapted to give users con dence in the results they produce. It does this by adaptations to the design for documenta- tion, corroboration, independent storage and accountability.} } @INBOOK{Moreau:HPC07, AUTHOR = {Paul Groth and Steve Munroe and Simon Miles and Luc Moreau}, ALTEDITOR = {}, TITLE = {HPC and Grids in Action}, CHAPTER = {{Applying the Provenance Data Model to a Bioinformatics Case}}, PUBLISHER = {IOS Press}, YEAR = {2007}, PASOA = {yes}, OPTKEY = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTSERIES = {}, OPTTYPE = {}, OPTADDRESS = {}, OPTEDITION = {}, OPTMONTH = {}, OPTPAGES = {}, OPTNOTE = {}, OPTANNOTE = {} } @ARTICLE{Groth:TOIT, AUTHOR = {Paul Groth and Luc Moreau}, TITLE = {{A Shared Model for Documentation of Processes Enabling the Determination of Provenance}}, PASOA = {yes}, JOURNAL = {Submitted for Publication}, YEAR = {2007}, OPTKEY = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTPAGES = {}, OPTMONTH = {}, OPTNOTE = {}, OPTANNOTE = {} } @ARTICLE{Groth:TPDS07, AUTHOR = {Paul Groth and Luc Moreau}, TITLE = {Recording Process Documentation for Provenance}, JOURNAL = {Submitted for Publication}, YEAR = {2007}, PASOA = {yes}, OPTKEY = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTPAGES = {}, OPTMONTH = {}, OPTNOTE = {}, OPTANNOTE = {} } @INPROCEEDINGS{Miles:eScience07, AUTHOR = {Simon Miles and Ewa Deelman and Paul Groth and Karan Vahi and Gaurang Mehta and Luc Moreau}, TITLE = {Connecting Scientific Data to Scientific Experiments with Provenance}, OPTCROSSREF = {}, OPTKEY = {}, OPTBOOKTITLE = {}, OPTPAGES = {}, YEAR = {2007}, OPTEDITOR = {}, OPTVOLUME = {}, OPTNUMBER = {}, OPTSERIES = {}, OPTADDRESS = {}, OPTMONTH = {}, OPTORGANIZATION = {}, OPTPUBLISHER = {}, OPTNOTE = {}, OPTANNOTE = {}, SOCA = {yes}, PASOA = {yes}, ABSTRACT = {As scientific workflows, and the data they operate on, grow in size and complexity, the task of defining how those workflows should execute (which resources they should use, where those resources should be in preparation for processing etc.) becomes proportionally more difficult. While `workflow compilers', such as Pegasus, aid greatly in reducing this burden, a further problem arises: as specifying the details of execution is now automatic, a workflow's results are harder to interpret, as they are in part due to the specifics of execution. By automating the steps between the original experiment design and its results, we lose the connection between them, making results harder to interpret. To reconnect the scientific data with the original experiment, we argue that scientists should have access to the full provenance of their data, including not only parameters, input data and intermediary results, but also the abstract experiment, refined into a concrete execution by the `workflow compiler'. In this paper, we describe our preliminary work on adapting Pegasus to capture the process of workflow refinement in the PASOA provenance system.} } @ARTICLE{miles07determining, AUTHOR = {Simon Miles and Luc Moreau}, TITLE = {Determining Provenance Through Scoped Queries Over Causal Graphs}, JOURNAL = {To Be Submitted}, PASOA = {yes}, YEAR = {2007}, ABSTRACT = {The provenance of entities, whether electronic data or physical artefacts, is crucial information in practically all domains, including science, business and art. The increased use of software in automating our activities provides the opportunity to add greatly to the amount we can know about an entity's history and the process by which it came to be as it is. However, it also presents difficulties: querying for the provenance of an entity could potentially return detailed information stretching back far into the past, most of it irrelevant to the querier. In this paper, we define the concept of a provenance query and describe techniques that allow us to perform scoped provenance queries, by which a querier can declare in advance exactly what in the item's history is relevant to them. Using these techniques, a user can discover the provenance of data in the applications they use.} } @TECHREPORT{opm:2007, AUTHOR = {Luc Moreau and Juliana Freire and Jim Myers and Joe Futrelle and Patrick Paulson}, TITLE = {The Open Provenance Model}, INSTITUTION = {University of Southampton}, YEAR = {2007}, PASOA = {yes}, OPTKEY = {}, OPTTYPE = {}, OPTNUMBER = {}, OPTADDRESS = {}, OPTMONTH = {}, OPTNOTE = {}, OPTANNOTE = {} } @INPROCEEDINGS{Townend2005, AUTHOR = {Paul Townend and Paul Groth and Jie Xu}, TITLE = {A Provenance-Aware Weighted Fault Tolerance Scheme for Service-Based Applications}, BOOKTITLE = {Proc. of the 8th IEEE International Symposium on Object-oriented Real-time distributed Computing (ISORC 2005)}, YEAR = {2005}, MONTH = MAY, PASOA = {yes}, EXPORT = {yes}, ABSTRACT = {Service-orientation has been proposed as a way of facilitating the development and integration of increasingly complex and heterogeneous system components. However, there are many new challenges to the dependability community in this new paradigm, such as how individual channels within fault-tolerant systems may invoke common services as part of their workflow, thus increasing the potential for common-mode failure. We propose a scheme that - for the first time - links the technique of provenance with that of multi-version fault tolerance. We implement a large test system and perform experiments with a single-version system, a traditional MVD system, and a provenance-aware MVD system, and compare their results. We show that for this experiment, our provenance-aware scheme results in a much more dependable system than either of the other systems tested, whilst imposing a negligible timing overhead. }, OWNER = {pgroth} } @INPROCEEDINGS{Miles:BIOMED05, AUTHOR = {Simon Miles}, TITLE = {Agent-Oriented Data Curation in Bioinformatics}, BOOKTITLE = {Proceedings of Workshop on Multi-Agent Systems in Medicine, Computational Biology, and Bioinformatics (MAS*BioMed'05)}, YEAR = {2005}, MONTH = JUL, OWNER = {sm}, PASOA = {yes}, EXPORT = {yes}, PAGES = {157-169}, ABSTRACT = {Bioinformatics is a fast-growing field in which biological data is analysed and shared using software tools. However, due to the field’s success, the size and complexity of the data being produced is increasing fast. It also means that new, relatively inexperienced researchers are constantly being recruited. Together, these characteristics make it hard for organisations to ensure that work is being undertaken on the best available data and with the best available tools. Several strands of research aim to support the bioinformatics community in managing the complexity of their experiments. In our own recent work, we have focused on recording the provenance of experimental results [8]. The provenance of a piece of data is the process that led to that data, and provenance data is the documentation of that process. We have determined a number of provenancerelated use cases in bioinformatics [11] through interviews with scientists, such as the comparison of two experiment runs to determine why results were different, and justifying that the experiment was performed in a valid way to others. We have provided software to record and maintain provenance data in provenance stores.}, PASOA = {yes}, URL = {http://eprints.ecs.soton.ac.uk/10853/} } @ARTICLE{Miles06, AUTHOR = {Simon Miles}, TITLE = {Agent-Oriented Data Curation in Bioinformatics}, JOURNAL = {International Transactions on Systems Science and Applications}, YEAR = {2006}, PASOA = {yes}, EXPORT = {yes}, PAGES = {43--50}, VOLUME = 1, NUMBER = 1, ABSTRACT = {Bioinformatics is a fast-growing field in which biological data is analysed and shared using software tools. However, due to the field’s success, the size and complexity of the data being produced is increasing fast. It also means that new, relatively inexperienced researchers are constantly being recruited. Together, these characteristics make it hard for organisations to ensure that work is being undertaken on the best available data and with the best available tools. Several strands of research aim to support the bioinformatics community in managing the complexity of their experiments. In our own recent work, we have focused on recording the provenance of experimental results [8]. The provenance of a piece of data is the process that led to that data, and provenance data is the documentation of that process. We have determined a number of provenancerelated use cases in bioinformatics [11] through interviews with scientists, such as the comparison of two experiment runs to determine why results were different, and justifying that the experiment was performed in a valid way to others. We have provided software to record and maintain provenance data in provenance stores.}, PASOA = {yes}, URL = {http://eprints.ecs.soton.ac.uk/10853/} } @INPROCEEDINGS{Townend2005a, AUTHOR = {Paul Townend and Paul Groth and Nik Looker and Jie Xu}, TITLE = {{FT-Grid: A Fault-Tolerance System for e-Science}}, BOOKTITLE = {Proceedings of the UK OST e-Science Fourth All Hands Meeting (AHM05)}, YEAR = {2005}, PASOA = {yes}, PUBLISH = {yes}, MONTH = {September}, OWNER = {pgroth}, ABSTRACT = {The size and complexity of many e-Science applications suggests that they may be very prone to errors and failures; the cost of recovering from failures may also be high. The FT-Grid system, developed as part of the e-Demand project at the University of Leeds [1], introduces a replication-based fault tolerance scheme that allows faults occurring in service-based systems to be tolerated, thus increasing the dependability of such systems. This paper details the progress that has been made in the development of FT-Grid, including both a GUI client and also an FT-Grid web service interface. We show empirical evidence of the dependability benefits offered by FT-Grid, by performing a dependability analysis on the results of fault injection testing performed with the WS-FIT tool at the University of Durham. We then illustrate a potential problem with voting based fault tolerance approaches in the service-oriented paradigm . namely, that individual channels within fault-tolerant systems may invoke common services as part of their workflow, thus increasing the potential for commonmode failure. We propose a solution to this issue by using the technique of provenance to provide FT-Grid with topological awareness. We implement a large test system, and - with the use of the PreServ provenance system developed as part of the PASOA e-Science project at the University of Southampton - perform a large number of experiments which show that a provenance-aware FTGrid results in a much more dependable system than any of the other configurations tested, whilst imposing a negligible timing overhead.}, URL = {./mypapers/ahm-ftgrid-3.pdf} } @INPROCEEDINGS{Groth:IPAW06, AUTHOR = {Paul Groth and Simon Miles and Steve Munroe}, TITLE = {{Principles of High Quality Documentation for Provenance: A Philosophical Discussion}}, BOOKTITLE = {International Provenance and Annotation Workshop (IPAW'06),}, YEAR = {2006}, EDITOR = {Luc Moreau and Ian Foster}, VOLUME = {4145}, SERIES = {Lecture Notes in Computer Science}, MONTH = {May}, PUBLISHER = {Springer}, URL = {http://eprints.ecs.soton.ac.uk/12568/}, EXPORT = {yes}, PASOA = {yes}, EUPUB = {yes}, PAGES = {278--286}, OWNER = {pgroth}, ABSTRACT = {Computer technology enables the creation of detailed documentation about the processes that create or affect entities (data, objects, etc.). Such documentation of the past can be used to answer various kinds of questions regarding the processes that led to the creation or modification of a particular entity. The answer to such questions are known as an entity’s provenance. In this paper, we derive a number of principles for documenting the past, grounded in work from philosophy and history, which allow for provenance questions to be answered within a computational context. These principles lead us to argue that an interaction-based model is particularly suited for representing high quality documentation of the past. } } @INPROCEEDINGS{Miles:IPAW06, AUTHOR = {Simon Miles}, TITLE = {{Electronically Querying for the Provenance of Entities}}, BOOKTITLE = {Proceedings of the International Provenance and Annotation Workshop 2006 (IPAW 2006)}, YEAR = {2006}, PASOA = {yes}, PAGES = {184-192}, SERIES = {Lecture Notes in Computer Science}, PUBLISHER = {Springer}, ABSTRACT = {The provenance of entities, whether electronic data or physical artefacts, is crucial information in practically all domains, including science, business and art. The increased use of software in automating activities provides the opportunity to add greatly to the amount we can know about an entity’s history and the process by which it came to be as it is. However, it also presents difficulties: querying for the provenance of an entity could potentially return detailed information stretching back to the beginning of time, and most of it possibly irrelevant to the querier. In this paper, we define the concept of provenance query and describe techniques that allow us to perform scoped provenance queries.}, URL = {http://eprints.ecs.soton.ac.uk/12567/} } @PHDTHESIS{Groth:PhdThesis07, AUTHOR = {Paul Groth}, EXPORT = {yes}, PASOA = {yes}, TITLE = {The Origin of Data: Enabling the Determination of Provenance in Multi-institutional Scientific Systems through the Documentation of Processes}, SCHOOL = {University of Southampton}, URL = {}, MONTH = {}, YEAR = {2007} }