i have just started learning solr. i have installed apache tomcat server and solr 3.5. I have successfully configured solr for searching data from oracle database for one entity. I am having problem when i am adding two entities in the data-config.xml and adding the fields in the solr schema.xml.I configured my data-config.xml like this…
<dataConfig>
<dataSource name="JdbcDataSource"
driver="oracle.jdbc.driver.OracleDriver"
url="jdbc:oracle:thin:@//192.168.1.3:1521/orcl"
user="SSOHANI"
password="Ssohani123"/>
<document name="doc">
<entity name="PROJECTS"
query="select PROJECTS.ID, PROJECTS.BATCH_ID, PROJECTS.OPERATION, PROJECTS.NAME,
PROJECTS.DESCRIPTION, PROJECTS.ESTIMATED_COST, PROJECTS.GRANTOR_AGENCY_ID,
PROJECTS.GRANTEE_AGENCY_ID, PROJECTS.PROJECT_STATUS_ID,
PROJECTS.PROJECT_TYPE_ID, PROJECTS.START_DATE, PROJECTS.END_DATE,
NVL(PROJECTS.TRACS_PARENT_PROJECT_ID,0) TRACS_PARENT_PROJECT_ID,
NVL(PROJECTS.STATE_PARENT_PROJECT_ID,0) STATE_PARENT_PROJECT_ID,
NVL(PROJECTS.PLAN_ID,0) PLAN_ID,
NVL(PROJECTS.PLAN_ID_TYPE,0) PLAN_ID_TYPE,
NVL(PROJECTS.TRACS_ID,0) TRACS_ID,
NVL(PROJECTS.STATE_ID,0) STATE_ID,
PROJECTS.VALID, PROJECTS.APPLIED,
NVL(PROJECTS.COMMENTS,'NULL') COMMENTS,
PROJECTS.GENERATED_PLAN_ID, PROJECTS.TRACS_PROJECT_ID,
PROJECTS.STATE_PLAN_ID from SSOHANI.PROJECTS" >
<field column="ID" name="projects_id" />
<field column="BATCH_ID" name="projects_batch_id" />
<field column="OPERATION" name="projects_operation" />
<field column="NAME" name="projects_name" />
<field column="DESCRIPTION" name="projects_description" />
<field column="ESTIMATED_COST" name="projects_estimated_cost" />
<field column="GRANTOR_AGENCY_ID" name="projects_grantor_agency_id" />
<field column="GRANTEE_AGENCY_ID" name="projects_grantee_agency_id" />
<field column="PROJECT_STATUS_ID" name="projects_project_status_id" />
<field column="PROJECT_TYPE_ID" name="projects_project_type_id" />
<field column="START_DATE" name="projects_start_date" />
<field column="END_DATE" name="projects_end_date" />
<field column="TRACS_PARENT_PROJECT_ID" name="projects_tracs_parent_project_id" />
<field column="STATE_PARENT_PROJECT_ID" name="projects_state_parent_project_id" />
<field column="PLAN_ID" name="projects_plan_id" />
<field column="PLAN_ID_TYPE" name="projects_plan_id_type" />
<field column="TRACS_ID" name="projects_tracs_id" />
<field column="STATE_ID" name="projects_state_id" />
<field column="VALID" name="projects_valid" />
<field column="APPLIED" name="projects_applied" />
<field column="COMMENTS" name="projects_comments" />
<field column="GENERATED_PLAN_ID" name="projects_generated_plan_id" />
<field column="TRACS_PROJECT_ID" name="projects_tracs_project_id" />
<field column="STATE_PLAN_ID" name="projects_state_plan_id" />
</entity>
<entity name="PLANS"
query="select PLANS.ID, PLANS.BATCH_ID, PLANS.OPERATION, PLANS.NAME, PLANS.DESCRIPTION,
PLANS.CONTACT_ID, PLANS.PLAN_TYPE_ID, PLANS.AGENCY_ID, PLANS.START_DATE,
NVL(PLANS.END_DATE,0) END_DATE,
NVL(PLANS.TRACS_PARENT_PLAN_ID,0) TRACS_PARENT_PLAN_ID,
NVL(PLANS.STATE_PARENT_PLAN_ID,0) STATE_PARENT_PLAN_ID,
NVL(PLANS.TRACS_ID,0) TRACS_ID,
NVL(PLANS.STATE_ID,0) STATE_ID,
PLANS.VALID, PLANS.APPLIED,
NVL(PLANS.COMMENTS,'NULL') COMMENTS from SSOHANI.PLANS" >
<field column="ID" name="plans_id" />
<field column="BATCH_ID" name="plans_batch_id" />
<field column="OPERATION" name="plans_operation" />
<field column="NAME" name="plans_name" />
<field column="DESCRIPTION" name="plans_description" />
<field column="CONTACT_ID" name="plans_contact_id" />
<field column="PLAN_TYPE_ID" name="plans_plan_type_id" />
<field column="AGENCY_ID" name="plans_agency_id" />
<field column="START_DATE" name="plans_start_date" />
<field column="END_DATE" name="plans_end_date" />
<field column="TRACS_PARENT_PLAN_ID" name="plans_tracs_parent_plan_id" />
<field column="STATE_PARENT_PLAN_ID" name="plans_state_parent_plan_id" />
<field column="TRACS_ID" name="plans_tracs_id" />
<field column="STATE_ID" name="plans_state_id" />
<field column="VALID" name="plans_valid" />
<field column="APPLIED" name="plans_applied" />
<field column="COMMENTS" name="plans_comments" />
</entity>
</document>
</dataConfig>
and I configured my schema.xml like this…
<schema>
<fields>
<field name="projects_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_batch_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_operation" type="string" indexed="true" stored="true" required="true"/>
<field name="projects_name" type="string" indexed="true" stored="true" required="true"/>
<field name="projects_description" type="string" indexed="true" stored="true" required="true"/>
<field name="projects_estimated_cost" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_grantor_agency_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_grantee_agency_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_project_status_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_project_type_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_start_date" type="date" indexed="true" stored="true" required="true"/>
<field name="projects_end_date" type="date" indexed="true" stored="true" required="true"/>
<field name="projects_tracs_parent_project_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_state_parent_project_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_plan_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_plan_id_type" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_tracs_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_state_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_valid" type="string" indexed="true" stored="true" required="true"/>
<field name="projects_applied" type="string" indexed="true" stored="true" required="true"/>
<field name="projects_comments" type="string" indexed="true" stored="true" required="true"/>
<field name="projects_generated_plan_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_tracs_project_id" type="long" indexed="true" stored="true" required="true"/>
<field name="projects_state_plan_id" type="long" indexed="true" stored="true" required="true"/>
<!-- fields for plan enity -->
<field name="plans_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_batch_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_operation" type="string" indexed="true" stored="true" required="true"/>
<field name="plans_name" type="string" indexed="true" stored="true" required="true"/>
<field name="plans_description" type="string" indexed="true" stored="true" required="true"/>
<field name="plans_contact_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_plan_type_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_agency_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_start_date" type="date" indexed="true" stored="true" required="true"/>
<field name="plans_end_date" type="date" indexed="true" stored="true" required="true"/>
<field name="plans_tracs_parent_plan_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_state_parent_plan_id" type="string" indexed="true" stored="true" required="true"/>
<field name="plans_tracs_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_state_id" type="long" indexed="true" stored="true" required="true"/>
<field name="plans_valid" type="string" indexed="true" stored="true" required="true"/>
<field name="plans_applied" type="string" indexed="true" stored="true" required="true"/>
<field name="plans_comments" type="string" indexed="true" stored="true" required="true"/>
</fields>
<uniqueKey>projects_id</uniqueKey>
<uniqueKey>plans_id</uniqueKey>
<defaultSearchField>projects_id</defaultSearchField>
</schema>
and my solrconfig.xml is…
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">/opt/solr/core0/conf/data-config.xml</str>
</lst>
</requestHandler>
now when i am running a full-import command i am getting the following error..
Apr 16, 2012 4:11:46 PM org.apache.solr.handler.dataimport.SolrWriter upload
WARNING: Error creating document : SolrInputDocument[{projects_tracs_id=projects_tracs_id(1.0)={0}, projects_name=projects_name(1.0)={Minnesota Firearms Safety Training Program}, projects_description=projects_description(1.0)={To train 17,500 students and 425 new instructors at 45 recruiting workshops. Hold one statwide training academy. Award 2,650 recognition awards for length of service.}, projects_comments=projects_comments(1.0)={NULL}, projects_plan_id=projects_plan_id(1.0)={0}, projects_end_date=projects_end_date(1.0)={2002-12-31 00:00:00.0}, projects_tracs_parent_project_id=projects_tracs_parent_project_id(1.0)={0}, projects_plan_id_type=projects_plan_id_type(1.0)={0}, projects_project_status_id=projects_project_status_id(1.0)={4}, projects_state_plan_id=projects_state_plan_id(1.0)={1126}, projects_estimated_cost=projects_estimated_cost(1.0)={600000}, projects_valid=projects_valid(1.0)={N}, projects_grantor_agency_id=projects_grantor_agency_id(1.0)={1154}, projects_start_date=projects_start_date(1.0)={2001-12-31 00:00:00.0}, projects_applied=projects_applied(1.0)={N}, projects_state_id=projects_state_id(1.0)={0}, projects_batch_id=projects_batch_id(1.0)={1433468017}, projects_generated_plan_id=projects_generated_plan_id(1.0)={2050667163}, projects_id=projects_id(1.0)={2009553709}, projects_operation=projects_operation(1.0)={INSERT}, projects_state_parent_project_id=projects_state_parent_project_id(1.0)={0}, projects_grantee_agency_id=projects_grantee_agency_id(1.0)={1235}, projects_tracs_project_id=projects_tracs_project_id(1.0)={1123}, projects_project_type_id=projects_project_type_id(1.0)={3}}]
org.apache.solr.common.SolrException: [doc=2009553709] missing required field: plans_applied
at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:346)
at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:60)
at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:115)
at org.apache.solr.handler.dataimport.SolrWriter.upload(SolrWriter.java:73)
at org.apache.solr.handler.dataimport.DataImportHandler$1.upload(DataImportHandler.java:293)
at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:636)
at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:268)
at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:187)
at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:359)
at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:427)
at org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:408)
solr is not able to read any of the fields of the second entity.
Can anyone help me out of this problem..?
please tell me what mistake i am making while configuring the data-config.xml or schema.xml or both the files..
Although from stack trace it looks like one of your Plan documents is missing value in required plans_applied field, I think that the first thing you need to be aware of is that data isn’t supposed to be normalized in Solr. It’s supposed to be flattened before entering index.
So, instead of indexing those two tables as separate entities, you should create a join (directly in data-config.xml, instead of your queries) between those two tables so that each resulting (join’s) table row becomes a Solr document.
That way, when you want to get all data about a single project it’ll all be in a single document – no need for joins for such a use case.
In Solr you should embrace redundancy, not relationships and constraints.
Makes sense?