To generate OPM output, we combined information from the execution log with the workflow specification and the module registry. We have included all pieces of this output as xml files including the original vistrail. The OPM output is generated from a run of version 121 of the workflow, and the wf file contains that version.
We also have versions of the log and OPM translation when the IsMatchTableRowCount method returned False:
Note that the values of processes and artifacts are VisTrails entities. We include 3 different accounts of the workflow in our OPM output. The first shows a Map function that processes a ListOfElements in order to loop over all of the CSVFileEntries; the second shows each iteration where each iteration is a Group (subworkflow) of modules; and the third shows the iterations with the interior steps.
![]() [PNG] [PDF] | ![]() [PNG] [PDF] | ![]() [PNG] [PDF] |
The same workflow, but halting with the IsMatchTableRowCount check for the second table. Again, we have three accounts:
![]() [PNG] [PDF] | ![]() [PNG] [PDF] | ![]() [PNG] [PDF] |
We used XQuery to query the XML specifications. upstream/downstream queries were implemented using recursive functions as follows:
(: derivedFrom* - artefacts upstream :) declare function local:derivedFrom($d, $a as element(opm:artifact)) as element(opm:artifact)* { let $a1 := for $used in $d//opm:used[opm:effect/@id=$d//opm:process[@id=$d// opm:wasGeneratedBy[opm:effect/@id=$a/@id]/opm:cause/@id]/@id], $artifact2 in $d//opm:artifact where $used/opm:cause/@id=$artifact2/@id return $artifact2 let $a2 := for $a3 in $a1 return local:derivedFrom($d, $a3) return $a1 | $a2 }; (: triggeredBy* - processes upstream :) declare function local:triggeredBy($d, $p as element(opm:process)) as element(opm:process)* { let $p1 := for $wasGeneratedBy in $d//opm:wasGeneratedBy[opm:effect/@id=$d//opm:artifact[@id=$d// opm:used[opm:effect/@id=$p/@id]/opm:cause/@id]/@id], $process2 in $d//opm:process where $wasGeneratedBy/opm:cause/@id=$process2/@id return $process2 let $p2 := for $p3 in $p1 return local:triggeredBy($d, $p3) return $p1 | $p2 }; (: triggers* - processes downstream :) declare function local:triggers($d, $p as element(opm:process)) as element(opm:process)* { let $p1 := for $used in $d//opm:used[opm:cause/@id=$d//opm:artifact[@id=$d// opm:wasGeneratedBy[opm:cause/@id=$p/@id]/opm:effect/@id]/@id], $process2 in $d//opm:process where $used/opm:effect/@id=$process2/@id return $process2 let $p2 := for $p3 in $p1 return local:triggers($d, $p3) return $p1 | $p2 };
declare namespace opm='http://openprovenance.org/model/v1.01.a'; let $d := doc('workflow_opm2.xml') (: The user must first find the detection value in the DB. That table is used to find the artifact :) let $a := $d//artifact[value/function/parameter/@val = 'P2Detection'] (: return all artifacts upstream containing a P2Detection.csv file :) return local:derivedFrom($d, $a)[ends-with(value/function/parameter/@val,'P2Detection.csv')] (: result: <?xml version="1.0" encoding="UTF-8"?> <artifact id="a34"> <value> <function id="-1" name="file" pos="0"> <parameter alias="" id="-1" name="" pos="0" type="edu.utah.sci.vistrails.basic:File" val="/vistrails/pc3/SampleData/J062945/P2_J062945_B001_P2fits0_20081115_P2Detection.csv"/> </function> </value> <account id="acct2"/> </artifact> :)
let $d := doc('workflow_opm2.xml') (: Get the artifacts containing a database entry :) let $artifacts := $d//artifact[value/function/parameter/@val = 'P2Detection'] (: Get the processes of type LoadCSVFileIntoDB :) let $processes := $d//process[value/moduleExec/@moduleName='LoadCSVFileIntoDB'] (: Get a process that produces such a artifact :) let $loadProcess := for $artifact in $artifacts, $wasGeneratedBy in $d//wasGeneratedBy, $process in $processes where $wasGeneratedBy/cause/@id = $process/@id and $wasGeneratedBy/effect/@id = $artifact/@id return $process (: Get all processes downstream :) let $triggered := local:triggers($d, $loadProcess) (: see if any descendant processes were ComputeColumns :) (: Processes that failed are excluded :) let $found := (for $process in $triggered where $process/value/moduleExec/@moduleName = "ComputeColumns" and $process/value/moduleExec/@error = "" return $process) return if(count($found) = 0) then "no" else "yes" (: result: <?xml version="1.0" encoding="UTF-8"?>yes :)
let $d := doc('workflow_opm2.xml') (: Get the artifacts containing a image database entry with specific value J062945_LoadDB:) let $artifacts := $d//artifact[value/function/parameter/@val = 'J062945_LoadDB' and value/function/parameter/@val = 'P2ImageMeta'] (: Get the process that produces such a artifact :) let $loadProcess := for $artifact in $artifacts, $wasGeneratedBy in $d//wasGeneratedBy, $process in $d//process where $wasGeneratedBy/cause/@id = $process/@id and $wasGeneratedBy/effect/@id = $artifact/@id return $process let $triggeredBy := local:triggeredBy($d, $loadProcess) return $triggeredBy (: Result: (no way of removing results that does not affect the value) <?xml version="1.0" encoding="UTF-8"?> <process id="p0"> <value> <moduleExec cached="0" completed="1" error="" id="1" machine_id="1" moduleId="12" moduleName="String" tsEnd="2009-05-14 16:51:46" tsStart="2009-05-14 16:51:46"/> </value> <account id="acct0"/> <account id="acct1"/> <account id="acct2"/> </process> <process id="p1"> <value> <moduleExec cached="0" completed="1" error="" id="2" machine_id="1" moduleId="13" moduleName="ConcatenateString" tsEnd="2009-05-14 16:51:46" tsStart="2009-05-14 16:51:46"/> </value> <account id="acct0"/> <account id="acct1"/> <account id="acct2"/> </process> <process id="p2"> <value> <moduleExec cached="0" completed="1" error="" id="3" machine_id="1" moduleId="2" moduleName="GetCSVFiles" tsEnd="2009-05-14 16:51:46" tsStart="2009-05-14 16:51:46"> <annotation id="1" key="used_files" value="['/vistrails/pc3/SampleData/J062945/csv_ready.csv']"/> </moduleExec> </value> <account id="acct0"/> <account id="acct1"/> <account id="acct2"/> </process> <process id="p3"> <value> <moduleExec cached="0" completed="1" error="" id="4" machine_id="1" moduleId="4" moduleName="CreateEmptyLoadDB" tsEnd="2009-05-14 16:51:46" tsStart="2009-05-14 16:51:46"/> </value> <account id="acct0"/> <account id="acct1"/> <account id="acct2"/> </process> <process id="p11"> <value> <moduleExec cached="" completed="1" error="" id="-1" machine_id="" moduleId="-1" moduleName="Split" tsEnd="" tsStart=""/> </value> <account id="acct1"/> <account id="acct2"/> </process> <process id="p21"> <value> <moduleExec cached="0" completed="1" error="" id="16" machine_id="1" moduleId="5" moduleName="InputPort" tsEnd="2009-05-14 16:51:46" tsStart="2009-05-14 16:51:46"/> </value> <account id="acct2"/> </process> <process id="p22"> <value> <moduleExec cached="0" completed="1" error="" id="17" machine_id="1" moduleId="4" moduleName="InputPort" tsEnd="2009-05-14 16:51:46" tsStart="2009-05-14 16:51:46"/> </value> <account id="acct2"/> </process> <process id="p23"> <value> <moduleExec cached="0" completed="1" error="" id="18" machine_id="1" moduleId="3" moduleName="ReadCSVFile" tsEnd="2009-05-14 16:51:46" tsStart="2009-05-14 16:51:46"> <annotation id="6" key="used_files" value="['/vistrails/pc3/SampleData/J062945/P2_J062945_B001_P2fits0_20081115_P2ImageMeta.csv.hdr']"/> </moduleExec> </value> <account id="acct2"/> </process> :)
let $d := doc('sdsc.xml') (: This is the detection value :) let $s := '261887437030025141' (: Find the artifact containing it :) let $a := $d//opm:artifact[contains(.,$s)] (: Find the artifact upstream containing the csv file:) let $result := for $artifact in local:derivedFrom($d, $a), $used in $d//opm:used where $used/opm:role/@value = "FileEntry" and $used/opm:cause/@id = $artifact/@id return $artifact return $d//opm:artifact[@id=$result/@id] (: result: <artifact xmlns="http://openprovenance.org/model/v1.01.a" id="_a54"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string"> {Checksum = "f8f9d70711cb3a1cb8b359d99d98fa63", ColumnNames = {"objID", "detectID", "ippObjID", "ippDetectID", "filterID", "imageID", "obsTime", "xPos", "yPos", "xPosErr", "yPosErr", "instFlux", "instFluxErr", "psfWidMajor", "psfWidMinor", "psfTheta", "psfLikelihood", "psfCf", "infoFlag", "htmID", "zoneID", "assocDate", "modNum", "ra", "dec", "raErr", "decErr", "cx", "cy", "cz", "peakFlux", "calMag", "calMagErr", "calFlux", "calFluxErr", "calColor", "calColorErr", "sky", "skyErr", "sgSep", "dataRelease"}, FilePath = "pc3/workflows/data/J062941/P2_J062941_B001_P2fits0_20081115_P2Detection.csv", HeaderPath = "pc3/workflows/data/J062941/P2_J062941_B001_P2fits0_20081115_P2Detection.csv.hdr", RowCount = 20, TargetTable = "P2Detection"} </value> </artifact> :)
let $d := doc('sdsc.xml') (: Get the artifacts containing a database entry :) let $artifacts := $d//opm:artifact[contains(.,'TargetTable = "P2Detection"')] (: Get the processes of type LoadCSVFileIntoTable :) let $processes := $d//opm:process[contains(.,'LoadCSVFileIntoTable')] (: Get the load process that uses such a artifact :) let $loadProcess := for $artifact in $artifacts, $used in $d//opm:used, $process in $processes where $used/opm:effect/@id = $process/@id and $used/opm:cause/@id = $artifact/@id return $process (: Get all processes downstream :) let $triggered := local:triggers($d, $loadProcess) (: get any descendant processes that are IsMatchTableColumnRanges :) let $found := (for $process in $triggered where contains($process, "IsMatchTableColumnRanges") return $process) return if(count($found) = 0) then "no" else "yes" (: result: <?xml version="1.0" encoding="UTF-8"?>yes :)
let $d := doc('sdsc.xml') (: Get the artifacts containing a database entry :) let $artifacts := $d//opm:artifact[contains(.,'TargetTable = "P2ImageMeta"')] (: Get the processes of type LoadCSVFileIntoTable :) let $processes := $d//opm:process[contains(.,'LoadCSVFileIntoTable')] (: Get the load process that uses such a artifact :) let $loadProcess := for $artifact in $artifacts, $used in $d//opm:used, $process in $processes where $used/opm:effect/@id = $process/@id and $used/opm:cause/@id = $artifact/@id return $process let $triggeredBy := local:triggeredBy($d, $loadProcess) return $triggeredBy (: result: (not consistent with sdsc results!) (no way of removing results that does not affect the value) <?xml version="1.0" encoding="UTF-8"?> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p0"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.IsCSVReadyFileExists fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p1"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.StopOnFalse fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p2"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ReadCSVReadyFile fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p3"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.IsMatchCSVFileTables fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p4"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.StopOnFalse2 fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p5"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.CreateEmptyLoadDB fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p6"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.Array Permute fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p8"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.in fire 0</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p27"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.CompositeActor.in fire 3</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p28"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.CompositeActor.Record Disassembler fire 1</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p29"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.CompositeActor.IsExistsCSVFile fire 1</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p30"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.CompositeActor.StopOnFalse fire 1</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p31"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.CompositeActor.ReadCSVFileColumnNames fire 1</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p32"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.CompositeActor.IsMatchCSVFileColumnNames fire 1</value> </process> <process xmlns="http://openprovenance.org/model/v1.01.a" id="_p33"> <value xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xsi:type="xs:string">.load.ForEach.CompositeActor.StopOnFalse2 fire 1</value> </process> :)
-- JulianaFreire - 02 Apr 2009
to top