<?xml version="1.0" encoding="UTF-8"?>
<item xmlns="http://omeka.org/schemas/omeka-xml/v5" itemId="1815" public="1" featured="0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://omeka.org/schemas/omeka-xml/v5 http://omeka.org/schemas/omeka-xml/v5/omeka-xml-5-0.xsd" uri="https://mdl-data.library.utoronto.ca/items/show/1815?output=omeka-xml" accessDate="2026-06-26T16:27:58-04:00">
  <itemType itemTypeId="20">
    <name>Data Collections</name>
    <description>These fields are in addition to Dublin Core fields to describe MDL's data collections, including microdata, statistical data, and GIS data.</description>
    <elementContainer>
      <element elementId="108">
        <name>Links</name>
        <description/>
        <elementTextContainer>
          <elementText elementTextId="231399">
            <text>&lt;a href="https://utoronto.sharepoint.com/sites/UTL-MDL-LicensedDataAndSoftware/SitePages/Welcome.aspx"&gt;MDL Licensed Data and Software page&lt;/a&gt; &lt;br /&gt;Select "Full-Text Corpus Data"</text>
          </elementText>
        </elementTextContainer>
      </element>
      <element elementId="53">
        <name>Collection</name>
        <description>Which type of resource this is</description>
        <elementTextContainer>
          <elementText elementTextId="231402">
            <text>&lt;span&gt;There are 12 different corpora available, to view a description of them, please use the following link: &lt;/span&gt;&lt;a href="https://www.corpusdata.org/corpora.asp"&gt;overview of the corpora&lt;/a&gt; 
&lt;p&gt;Each corpora is available in three different formats: a database, a word/lemma/part of speech format, and a linear text format. Additionally, each corpora contains a full lexicon file and a file containing the list of sources used. &lt;/p&gt;
&lt;p&gt;&lt;a href="https://www.corpusdata.org/formats.asp"&gt;A full overview of the formats&lt;/a&gt;&lt;/p&gt;</text>
          </elementText>
        </elementTextContainer>
      </element>
      <element elementId="111">
        <name>Terms of Use</name>
        <description/>
        <elementTextContainer>
          <elementText elementTextId="231405">
            <text>&lt;span&gt;The English-Corpora.org text corpora are intended for academic study, research, teaching and administrative use at the University of Toronto. The data is restricted to University of Toronto faculty, students, researchers and staff.  It is strictly forbidden to use this dataset or derivatives for commercial use. Further distribution of this data or derivatives, is prohibited. The full restrictions are available here: &lt;br /&gt;&lt;/span&gt;&lt;a href="https://www.corpusdata.org/restrictions.asp"&gt;Restrictions on use of the corpora&lt;/a&gt;</text>
          </elementText>
        </elementTextContainer>
      </element>
      <element elementId="89">
        <name>Notes</name>
        <description/>
        <elementTextContainer>
          <elementText elementTextId="231408">
            <text>&lt;ul&gt;
&lt;li&gt;&lt;a href="https://www.corpusdata.org/database.asp"&gt;Using the database/SQL format&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://www.corpusdata.org/limitations.asp"&gt;Limitations of the data for copyright compliance&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;</text>
          </elementText>
        </elementTextContainer>
      </element>
    </elementContainer>
  </itemType>
  <elementSetContainer>
    <elementSet elementSetId="1">
      <name>Dublin Core</name>
      <description>The Dublin Core metadata element set is common to all Omeka records, including items, files, and collections. For more information see, http://dublincore.org/documents/dces/.</description>
      <elementContainer>
        <element elementId="50">
          <name>Title</name>
          <description>A name given to the resource</description>
          <elementTextContainer>
            <elementText elementTextId="231393">
              <text>Full-text corpus data</text>
            </elementText>
          </elementTextContainer>
        </element>
        <element elementId="41">
          <name>Description</name>
          <description>An account of the resource</description>
          <elementTextContainer>
            <elementText elementTextId="231396">
              <text>These full text corpora are some of the most widely used text corpora. They represent a wide range of subjects and sources including webpages, forums, magazines, newspapers, TV and Movie subtitles, academic papers, and a Spanish and Portuguese Corpus. Date ranges vary from corpora. For more information about the corpora, you can read the &lt;a href="https://www.corpusdata.org/intro.asp"&gt;Full-text corpus data overview&lt;/a&gt;.</text>
            </elementText>
          </elementTextContainer>
        </element>
        <element elementId="49">
          <name>Subject</name>
          <description>The topic of the resource</description>
          <elementTextContainer>
            <elementText elementTextId="231409">
              <text>Text data, Web data</text>
            </elementText>
          </elementTextContainer>
        </element>
        <element elementId="39">
          <name>Creator</name>
          <description>An entity primarily responsible for making the resource</description>
          <elementTextContainer>
            <elementText elementTextId="231412">
              <text>English-Corpora.org</text>
            </elementText>
          </elementTextContainer>
        </element>
        <element elementId="42">
          <name>Format</name>
          <description>The file format, physical medium, or dimensions of the resource</description>
          <elementTextContainer>
            <elementText elementTextId="231415">
              <text>TXT, SQL</text>
            </elementText>
          </elementTextContainer>
        </element>
        <element elementId="44">
          <name>Language</name>
          <description>A language of the resource</description>
          <elementTextContainer>
            <elementText elementTextId="231418">
              <text>English, Spanish, Portuguese</text>
            </elementText>
          </elementTextContainer>
        </element>
      </elementContainer>
    </elementSet>
  </elementSetContainer>
</item>
