DC/DWC special indexing


  • dc:date indexed as date (dc.date_dt) and as text (dc.date)

Edit SOLR schema.xml (/usr/local/solr/islandora/conf/)

...
<fieldType name="date" class="solr.TrieDateField" sortMissingLast="true" omitNorms="true" precisionStep="6" positionIncrementGap="100"/>
...
   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
...

Edit DC_to_solr.xslt (/var/lib/tomcat7/webapps/fedoragsearch/WEB-INF/classes/fgsconfigFinal/index/FgsIndex/islandora_transforms/)

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  xmlns:foxml="info:fedora/fedora-system:def/foxml#"
  xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
  xmlns:dc="http://purl.org/dc/elements/1.1/">
  <xsl:template match="foxml:datastream[@ID='DC' or @ID='QDC']/foxml:datastreamVersion[last()]">
    <xsl:param name="content"/>
    <xsl:param name="prefix">dc.</xsl:param>
    <xsl:param name="suffix"></xsl:param>
    <xsl:apply-templates select="$content/oai_dc:dc">
      <xsl:with-param name="prefix" select="$prefix"/>
      <xsl:with-param name="suffix" select="$suffix"/>
    </xsl:apply-templates>
  </xsl:template>
  <xsl:template match="oai_dc:dc">
    <xsl:param name="prefix">dc.</xsl:param>
    <xsl:param name="suffix"></xsl:param>
    <xsl:param name="suffixt">_dt</xsl:param>
    <!-- Create fields for the set of selected elements, named according to the 'local-name' and containing the 'text' -->
    <xsl:for-each select="./*">
       <xsl:choose>
         <xsl:when test="local-name() ='date'">
              <field>
                <xsl:attribute name="name">
                  <xsl:value-of select="concat($prefix, local-name(), $suffixt)"/>
                </xsl:attribute>
                <xsl:variable name="rawTextValue" select="normalize-space(text())"/>
                <xsl:variable name="textValue">
                  <xsl:call-template name="get_ISO8601_date">
                    <xsl:with-param name="date" select="$rawTextValue"/>
                    <xsl:with-param name="pid" select="'not provided'"/>
                    <xsl:with-param name="datastream" select="'not provided'"/>
                  </xsl:call-template>
                </xsl:variable>
                <xsl:value-of select="$textValue"/>
              </field>
              <field>
                <xsl:attribute name="name">
                  <xsl:value-of select="concat($prefix, local-name(), $suffix)"/>
                </xsl:attribute>
                <xsl:value-of select="text()"/>
              </field>
         </xsl:when>
         <xsl:otherwise>
              <field>
                <xsl:attribute name="name">
                  <xsl:value-of select="concat($prefix, local-name(), $suffix)"/>
                </xsl:attribute>
                <xsl:value-of select="text()"/>
              </field>
         </xsl:otherwise>
       </xsl:choose>
    </xsl:for-each>
  </xsl:template>
</xsl:stylesheet>

* DC creator and subject / DWC scientificName, locality and higherGeography indexed as string

nano -w /usr/local/solr/islandora/conf/schema.xml </code> </WRAP>

...
   <field name="dc.subject" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.subject_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.creator" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.creator_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dwc.locality" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dwc.locality_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dwc.higherGeography" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dwc.higherGeography_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dwc.scientificName" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dwc.scientificName_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
...
<copyField source="dc.subject" dest="dc.subject_dct"/>
<copyField source="dc.creator" dest="dc.creator_dct"/>
<copyField source="dwc.locality" dest="dwc.locality_dct"/>
<copyField source="dwc.higherGeography" dest="dwc.higherGeography_dct"/>
<copyField source="dwc.scientificName" dest="dwc.scientificName_dct"/>
...

Stopwords and delimiter
In most cases, book language is Italian.

nano -w /usr/local/solr/islandora/conf/schema.xml
...
    <fieldType name="text_fgs" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwordsDC.txt"/>
      </analyzer>
    </fieldType>
...
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.HyphenatedWordsFilterFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
                types="wdfftypes.txt"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
                types="wdfftypes.txt"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
 
 
isla7x/index.txt ยท Last modified: 2017/04/05 14:22 by giancarlo

Developers: CNR IRCrES IT Office and Library
Giancarlo Birello (giancarlo.birello _@_ ircres.cnr.it) and Anna Perin (anna.perin _@_ ircres.cnr.it)
ASA@TO.CNR is licensed under: Creative Commons License
Recent changes RSS feed Creative Commons License Valid XHTML 1.0 Valid CSS Driven by DokuWiki
Drupal Garland Theme for Dokuwiki