unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
* bug#23898: 24.3; python-indent-offset was set to 0 but was and should be 4
@ 2016-07-05 14:40 Sebi
  2016-07-05 16:09 ` Glenn Morris
  0 siblings, 1 reply; 4+ messages in thread
From: Sebi @ 2016-07-05 14:40 UTC (permalink / raw)
  To: 23898

[-- Attachment #1: Type: text/plain, Size: 374 bytes --]

--=-=-=
Content-Type: text/plain




In GNU Emacs 24.3.1 (x86_64-pc-linux-gnu, GTK+ Version 3.10.7)
of 2014-03-07 on lamiak, modified by Debian
Windowing system distributor `The X.Org Foundation


I had this issue for the file in the attachment (so I get an arithmetic 
error when pressing tab). I saw on stackoverflow that this should best 
be reported to emacs, so I did.

[-- Attachment #2: testresults.py --]
[-- Type: text/x-python, Size: 66 bytes --]



from dataset import cm



def test_supv_approx():
    return 5


[-- Attachment #3: testresults.py~ --]
[-- Type: text/plain, Size: 3902 bytes --]



from nltk.corpus import brown
from nltk.corpus import treebank as penn
from nltk.corpus import multext_east as mte
from nltk.corpus import alpino


import utils


### CORPORA ###


def get_brown_corp():
    return brown.tagged_sents()

def get_penn_corp(wsj=None):
    sents = penn.tagged_sents()
    if wsj:
        if wsj > 0 and wsj < 200:
            filename = 'wsj_' + str(wsj).rjust(4, '0') + '.mrg'
            sents = penn.tagged_sents(filename)
    return list(map(lambda s: list(filter(lambda tup: tup[1] != '-NONE-', s)), sents))

def get_mte_corp(lang):
    if lang is "bulgarian": # DOESN'T WORK !!!
        return mte.tagged_sents("oana-bg.xml")
    elif lang is "czech":
        return mte.tagged_sents("oana-cs.xml")
    elif lang is "english":
        return mte.tagged_sents("oana-en.xml")
    elif lang is "estonian":
        return mte.tagged_sents("oana-et.xml")
    elif lang is "Farci":
        return mte.tagged_sents("oana-fa.xml")
    elif lang is "hungarian":
        return mte.tagged_sents("oana-hu.xml")
    elif lang is "macedonian":
        return mte.tagged_sents("oana-mk.xml")
    elif lang is "polish":
        return mte.tagged_sents("oana-pl.xml")
    elif lang is "romanian":
        return mte.tagged_sents("oana-ro.xml")
    elif lang is "slovak":
        return mte.tagged_sents("oana-sk.xml")
    elif lang is "slovene":
        return mte.tagged_sents("oana-sl.xml")
    elif lang is "serbian":
        return mte.tagged_sents("oana-sr.xml")
    return mte.tagged_sents("oana-en.xml")

def get_alpino_corp():
    return alpino.tagged_sents(tagset='universal')  


class CorpusManager():

    def __init__(self, k=10):
        self._nextid = 0
        self.k = k
        self.corpids = set()
        self.corpnams = {}
        self.corplangs = {}
        self.corpparts = {}

    def addCorpus(self, corpname, lang, taggedsents):
        self._nextid += 1
        cid = self._nextid
        self.corpids.add(cid)
        self.corpnams[cid] = corpname
        self.corplangs[cid] = lang
        self.corpparts[cid] = self._mk_partitions(taggedsents)
        return cid

    def _mk_partitions(self, taggedsents):
        parts = [[] for i in range(self.k)]
        remaining = list(taggedsents)
        while len(remaining) >= self.k:
            for i in range(self.k):
                nextsent = utils.popFront(remaining)
                utils.pushBack(parts[i], nextsent)
        return parts

    def allCorpora(self):
        for cid in self.corpids:
            yield (cid, self.corpnams[cid], self.corplangs[cid])
    
    def getTest(self, cid):
        """Returns the test parition for the 
        corpus.
        """
        return self.corpparts[cid][0]

    def _get_tvparts(self, cid):
        return self.corpparts[cid][1:]

    def getTrainVal(self, cid):
        """Returns the training set used for
        evaluation, which includes the validation
        partition.
        """
        return utils.flatten(self._get_tvparts(cid))

    def allTrainValidParts(self, cid):
        """Returns k-fold cross-validation 
        partions.
        """
        parts = self._get_tvparts(cid)
        for p in range(self.k-1):
            pval = parts[p]
            ptrain = utils.flatten(parts[:p] + parts[p+1:])
            yield (ptrain, pval)



def createCM():
    cm = CorpusManager()
    cm.addCorpus("Penn", "English", get_penn_corp())
    cm.addCorpus("Alpino", "Dutch", get_alpino_corp())
    cm.addCorpus("MTE", "Estonian", get_mte_corp('estonian'))
    cm.addCorpus("MTE", "Romanian", get_mte_corp('romanian'))
    cm.addCorpus("MTE", "Serbian", get_mte_corp('serbian'))
    cm.addCorpus("MTE", "Slovene", get_mte_corp('slovene'))
    cm.addCorpus("MTE", "English", get_mte_corp('english'))
    cm.addCorpus("MTE", "Czech", get_mte_corp('czech'))
    cm.addCorpus("Brown", "English", get_brown_corp())
    return cm

cm = createCM()

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-07-05 16:22 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-05 14:40 bug#23898: 24.3; python-indent-offset was set to 0 but was and should be 4 Sebi
2016-07-05 16:09 ` Glenn Morris
2016-07-05 16:20   ` Sebi
2016-07-05 16:22     ` Sebi

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).