1<?php 2 3$extractParams = new SolrModifiableParams(); 4$extractParams 5// index the document, using the unique ID: doc1 6 ->set(SolrExtractRequest::LITERALS_PREFIX . 'id', 'doc1') 7 8// capture what is inside paragraph tags 9 ->set(SolrExtractRequest::CAPTURE_ELEMENTS, 'p') 10 11// Indexes attributes of the Tika XHTML elements into separate fields 12 ->set(SolrExtractRequest::CAPTURE_ATTRIBUTES, 'true') 13 14// map p content to solr field 15 ->set( 16 SolrExtractRequest::FIELD_MAPPING_PREFIX . 'p', 17 'an_indexed_field_name_that_holds_paragraphs' 18 ) 19 20// capture unmapped content here 21 ->set(SolrExtractRequest::DEFAULT_FIELD, '__text__') 22 23// restrict capturing to matching xpath expression 24->set( 25 SolrExtractRequest::XPATH_EXPRESSION, 26 '/xhtml:html/xhtml:body/xhtml:div//node()' 27 ) 28; 29 30$binContent = file_get_contents('somefile.pdf'); 31// please reference docs/documentation.php for the rest of the parameters 32 33$extractRequest = SolrExtractRequest::createFromStream($binContent, 'application/pdf', $extractParams); 34$response = $client->sendUpdateStream($extractRequest); 35 36