Data Extraction Workflow File, driving DataScraper's workflow engine, records required workflow processors in sequence for extracting data from the Web pages belonging to a specific theme. The file is in format of XML whose name is suffixed with .profile.xml. The following is a example:
<?xml version="1.0"?> 
<geometa-session-profile> 
<theme>theme-name</theme> 	<!-- the theme name -->
<processor id="MigrateWorksBucket"> 
<class>com.geometa.spider.processor.MigrateWorksBucket</class> 
</processor> 
<processor id="FetchSpiderClue"> 
<class>com.geometa.spider.processor.FetchSpiderClue</class> 
</processor> 
<processor id="LoadHtmlPage"> 
<class>com.geometa.spider.processor.LoadHtmlPage</class> 
</processor> 
<processor id="FindDataSchema_Plain"> 
<class>com.geometa.spider.processor.FindDataSchema_Plain</class> 
</processor> 
<processor id="ExtractWebNodeData_Simp"> 
<class>com.geometa.spider.processor.ExtractWebNodeData_Simp</class> 
</processor> 
<processor id="ValidateExtraction"> 
<class>com.geometa.spider.processor.ValidateExtraction</class> 
</processor> 
<processor id="SaveFile_Simp"> 
<class>com.geometa.spider.processor.SaveFile_Simp</class> 
</processor> 
<processor id="ExtractSpiderClue_Simp"> 
<class>com.geometa.spider.processor.ExtractSpiderClue_Simp</class> 
</processor> 
<processor id="ConfirmSpiderClue_Simp"> 
<class>com.geometa.spider.processor.ConfirmSpiderClue_Simp</class> 
</processor> 
<processor id="CleanWorksBucket"> 
<class>com.geometa.spider.processor.CleanWorksBucket</class> 
</processor> 
</geometa-session-profile>
  