@ARTICLE{Adelberg1998, AUTHOR = {Brad Adelberg}, TITLE = {NoDoSE---A Tool for Semi-Automatically Extracting Structured and Semistructured Data from Text Documents}, JOURNAL = {SIGMOD Record}, YEAR = {1998}, VOLUME = {27}, PAGES = {283--294} }
@TECHREPORT{Adelberg1999, AUTHOR = {Brad Adelberg and Matt Denny}, TITLE = {Building Robust Wrappers for Text Sources}, INSTITUTION = {Computer Science Department, Northwestern University}, YEAR = {1999}, NOTE = {Technical Report}, ABSTRACT = {Many data sources, including web sites, do not support general query interfaces. The typical solution is to build a wrapper around the source which presents a general query interface to the underlying data by translating external queries to a form the local source understands, submitting the local query, and then repackaging the results in a generic way before returning them to the caller. This approach allows heterogeneous query processors to be built on top of underlying source over which one ...} }
@TECHREPORT{Ahuja2000, AUTHOR = {Abha Ahuja and Craig Labovitz and Srinivasan Venkatachary and Roger Wattenhofer}, TITLE = {The Impact of Internet Policy and Topology on Delayed Routing Convergence}, INSTITUTION = {Microsoft}, YEAR = {2000}, NUMBER = {MSR-TR-2000-74}, MONTH = {July 2000}, ABSTRACT = {This paper examines the roles of inter-domain topology and routing policy in the process of delayed Internet routing convergence. In recent work, we found that the Internet lacks effective inter-domain path fail-over. Unlike switches in the public telephony network which exhibit fail-over on the order of milliseconds, we showed Internet backbone routers may take tens of minutes to reach a consistent view of the network topology after a fault. In this paper, we expand on our earlier work by exploring the impact of specific Internet provider policies and topologies on the speed of routing convergence. Based on data from the experimental injection and measurement of several hundred thousand inter-domain routing faults, we show that the time for end-to-end Internet convergence depends on the length of the longest possible backup autonomous system path between a source and destination node. We also demonstrate significant variation in the convergence behaviors of Internet service providers, with the larger providers exhibiting the fastest convergence latencies. Finally, we discuss possible modifications to BGP and provider routing policies which if deployed, would improve inter-domain routing convergence.} }
@INPROCEEDINGS{Alon1996, AUTHOR = {Noga Alon and Yossi Matias and Mario Szegedy}, TITLE = {The space complexity of approximating the frequency moments}, BOOKTITLE = {28th ACM Symp. on Theory of Computing}, YEAR = {1996}, PAGES = {20--29}, ABSTRACT = {The frequency moments of a sequence containing m i elements of type i, for 1 i n, are the numbers Fk = P n i=1 m k i . We consider the space complexity of randomized algorithms that approximate the numbers Fk , when the elements of the sequence are given one by one and cannot be stored. Surprisingly, it turns out that the numbers F0 ; F1 and F2 can be approximated in logarithmic space, whereas the approximation of Fk for k 6 requires n\Omega\Gamma31 space. Applications to data bases are...} }
@TECHREPORT{SRI-CSL-95-06, AUTHOR = {Debra Anderson and Teresa {F.} Lunt and Harold Javitz and Ann Tamaru and Alfonso Valdes}, TITLE = {Detecting Unusual Program Behavior Using the Statistical Component of the Next-generation Intrusion Detection Expert System ({N}{I}{D}{E}{S})}, INSTITUTION = {Computer Science Laboratory, {SRI} International}, YEAR = {1995}, NUMBER = {{SRI-CSL-95-06}}, ADDRESS = {Menlo Park, {CA}}, MONTH = {May}, URL = {http://www.csl.sri.com/papers/sri-csl-95-06/} }
@INPROCEEDINGS{Carson2004, AUTHOR = {Carson Andorf and Adrian Silvescu and Drena Dobbs and Vasant Honavar}, TITLE = {Learning Classifiers for Assigning Protein Sequences to Gene Ontology Functional Families}, BOOKTITLE = {Fifth International Conference on Knowledge Based Computer Systems (KBCS 2004)}, YEAR = {2004}, PAGES = {256--265}, LOCATION = {Hyderabad, India} }
@INPROCEEDINGS{Anton2005, AUTHOR = {Tobias Anton}, TITLE = {XPath-Wrapper Induction by generating tree traversal patterns}, BOOKTITLE = {Lernen, Wissensentdeckung und Adaptivit{\"a}t (LWA) 2005, GI Workshops, Saarbr{\"u}cken}, YEAR = {2005}, PAGES = {126-133}, MONTH = {October}, PUBLISHER = {DFKI}, OWNER = {dkkang}, TIMESTAMP = {2006.06.12} }
@INPROCEEDINGS{APTE94b, AUTHOR = {Chidanand Apt{\'e} and Fred Damerau and Sholom M. Weiss}, TITLE = {Towards language independent automated learning of text categorization models}, BOOKTITLE = {SIGIR '94: Proceedings of the 17th annual international ACM SIGIR conference on Research and development in information retrieval}, YEAR = {1994}, PAGES = {23--30}, ADDRESS = {New York, NY, USA}, PUBLISHER = {Springer-Verlag New York, Inc.}, ISBN = {0-387-19889-X}, LOCATION = {Dublin, Ireland} }
@BOOK{arndt2001, TITLE = {Information Measures}, PUBLISHER = {Springer-Verlag Telos}, YEAR = {2001}, AUTHOR = {Christoph Arndt} }
@INPROCEEDINGS{DBLP:conf/kdd/AronisP97, AUTHOR = {John M. Aronis and Foster J. Provost}, TITLE = {Increasing the Efficiency of Data Mining Algorithms with Breadth-First Marker Propagation.}, BOOKTITLE = {Proceedings of the Third International Conference on Knowledge Discovery and Data Mining (KDD-97), Newport Beach, California, USA, August 14-17, 1997. AAAI Press, 1997}, YEAR = {1997}, EDITOR = {David Heckerman and Heikki Mannila and Daryl Pregibon}, PAGES = {119-122}, ISBN = {1-57735-027-8} }
@ARTICLE{Ashburner2000, AUTHOR = {Ashburner, M. and Ball, C.A. and Blake, J.A. and Botstein, D. and Butler, H. and Cherry, J.M. and Davis, A.P. and Dolinski, K. and Dwight, S.S. and Eppig, J.T. and Harris, M.A. and Hill, D.P. and Issel-Tarver, L. and Kasarskis, A. and Lewis, S. and Matese, J.C. and Richardson, J.E. and Ringwald, M. and Rubin, G.M. and Sherlock, G.}, TITLE = {Gene ontology: tool for the unification of biology. {T}he {G}ene {O}ntology {C}onsortium.}, JOURNAL = {Nature Genetics}, YEAR = {2000}, VOLUME = {25}, PAGES = {25-29}, NUMBER = {1} }
@INPROCEEDINGS{Ashish1997, AUTHOR = {Naveen Ashish and Craig Knoblock}, TITLE = {Wrapper Generation for Semi-structured Internet Sources}, BOOKTITLE = {Workshop on Management of Semistructured Data}, YEAR = {1997}, ADDRESS = {Tucson, Arizona} }
@INPROCEEDINGS{ILP03-Atramentov, AUTHOR = {Atramentov, A. and Leiva, H. and Honavar, V.}, TITLE = {A Multi-Relational Decision Tree Learning Algorithm - Implementation and Experiments}, BOOKTITLE = {ILP03}, YEAR = {2003}, EDITOR = {T.~Horv{\'a}th and A.~Yamamoto}, VOLUME = {2835}, SERIES = {LNAI}, PAGES = {38--56}, PUBLISHER = {Springer-Verlag}, ISBN = {3-540-20144-0} }
@INPROCEEDINGS{anna2003, AUTHOR = {Anna Atramentov and Vasant Honavar}, TITLE = {Speeding Up Multi-Relational Data Mining}, BOOKTITLE = {Workshop on Learning Statistical Models from Relational Data at 2003 International Joint Conference on Artificial Intelligence (IJCAI 2003)}, YEAR = {2003} }
@INPROCEEDINGS{DBLP:conf/ilp/AtramentovLH03, AUTHOR = {Anna Atramentov and Hector Leiva and Vasant Honavar}, TITLE = {A Multi-relational Decision Tree Learning Algorithm - Implementation and Experiments.}, BOOKTITLE = {Inductive Logic Programming (ILP) : 13th International Conference, ILP 2003, Szeged, Hungary, September 29-October 1, 2003, Proceedings}, YEAR = {2003}, VOLUME = {2835}, SERIES = {Lecture Notes in Computer Science}, PAGES = {38-56}, PUBLISHER = {Springer}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, ISBN = {3-540-20144-0} }
@TECHREPORT{Axelsson2000, AUTHOR = {Stefan Axelsson}, TITLE = {Intrusion Detection Systems: A Survey and Taxonomy}, INSTITUTION = {Chalmers Univ.}, YEAR = {2000}, NUMBER = {99-15}, MONTH = MAR, URL = {http://citeseer.nj.nec.com/axelsson00intrusion.html} }
@ARTICLE{Bach-y-Rita2003, AUTHOR = {Paul Bach-Y-Rita and Stephen W. Kercel}, TITLE = {Sensory Substitution and the Human-Machine Interface}, JOURNAL = {Trends in Cognitive Science}, YEAR = {2003}, VOLUME = {7}, PAGES = {541-546}, NUMBER = {12}, MONTH = {December} }
@ARTICLE{bairoch00swissprot, AUTHOR = {Amos Bairoch and Rolf Apweiler}, TITLE = {The {SWISS}-{PROT} protein sequence database and its supplement {T}r{EMBL} in 2000}, JOURNAL = {Nucleic Acids Res.}, YEAR = {2000}, VOLUME = {28}, PAGES = {45--48}, HOWPUBLISHED = {\url{http://www.expasy.ch/enzyme/}}, URL = {citeseer.ist.psu.edu/bairoch00swissprot.html} }
@ARTICLE{Bajcsy1976, AUTHOR = {R. Bajcsy and M. Tavakoli}, TITLE = {Computer Recognition of Roads from Satellite Pictures}, JOURNAL = {IEEE Transactions on Systems, Man and Cybernetics}, YEAR = {1976}, VOLUME = {6}, PAGES = {623--637}, NUMBER = {9} }
@INPROCEEDINGS{Baker1998, AUTHOR = {L. Douglas Baker and Andrew Kachites McCallum}, TITLE = {Distributional clustering of words for text classification}, BOOKTITLE = {Proceedings of the 21st annual international ACM SIGIR conference on Research and development in information retrieval}, YEAR = {1998}, PAGES = {96--103}, PUBLISHER = {ACM Press}, DOI = {http://doi.acm.org/10.1145/290941.290970}, ISBN = {1-58113-015-5}, LOCATION = {Melbourne, Australia} }
@ARTICLE{BaldiBCAN00, AUTHOR = {Pierre Baldi and S{\o}ren Brunak and Yves Chauvin and Claus A. F. Andersen and Henrik Nielsen}, TITLE = {Assessing the accuracy of prediction algorithms for classification: an overview.}, JOURNAL = {Bioinformatics}, YEAR = {2000}, VOLUME = {16}, PAGES = {412-424}, NUMBER = {5} }
@ARTICLE{Barbara2002, AUTHOR = {Daniel Barbara}, TITLE = {Requirements for clustering data streams}, JOURNAL = {ACM SIGKDD Explorations Newsletter}, YEAR = {2002}, VOLUME = {3}, PAGES = {23 - 27}, NUMBER = {2}, ABSTRACT = {Scientific and industrial examples of data streams abound in astronomy, telecommunication operations, banking and stock-market applications, e-commerce and other fields. A challenge imposed by continuously arriving data streams is to analyze them and to modify the models that explain them as new data arrives. In this paper, we analyze the requirements needed for clustering data streams. We review some of the latest algorithms in the literature and assess if they meet these requirements.} }
@ARTICLE{Barnard2003, AUTHOR = {Kobus Barnard and Pinar Duygulu and David Forsyth and Nando de Freitas and David Blei and Michael Jordan}, TITLE = {Matching Words and Pictures}, JOURNAL = {The Journal of Machine Learning Research}, YEAR = {2003}, VOLUME = {3}, PAGES = {1107 - 1135}, ABSTRACT = {We present a new approach for modeling multi-modal data sets, focusing on the specific case of segmented images with associated text. Learning the joint distribution of image regions and words has many applications. We consider in detail predicting words associated with whole images (auto-annotation) and corresponding to particular image regions (region naming). Auto-annotation might help organize and access large collections of images. Region naming is a model of object recognition as a process of translating image regions to words, much as one might translate from one language to another. Learning the relationships between image regions and semantic correlates (words) is an interesting example of multi-modal data mining, particularly because it is typically hard to apply data mining techniques to collections of images. We develop a number of models for the joint distribution of image regions and words, including several which explicitly learn the correspondence between regions and words. We study multi-modal and correspondence extensions to Hofmann's hierarchical clustering/aspect model, a translation model adapted from statistical machine translation (Brown et al.), and a multi-modal extension to mixture of latent Dirichlet allocation (MoM-LDA). All models are assessed using a large collection of annotated images of real scenes. We study in depth the difficult problem of measuring performance. For the annotation task, we look at prediction performance on held out data. We present three alternative measures, oriented toward different types of task. Measuring the performance of correspondence methods is harder, because one must determine whether a word has been placed on the right region of an image. We can use annotation performance as a proxy measure, but accurate measurement requires hand labeled data, and thus must occur on a smaller scale. We show results using both an annotation proxy, and manually labeled data.} }
@ARTICLE{Barsalou1983, AUTHOR = {L.W. Barsalou}, TITLE = {Ad hoc categories}, JOURNAL = {Memory \& Cognition}, YEAR = {1983}, VOLUME = {11}, PAGES = {211--227}, NUMBER = {3} }
@INPROCEEDINGS{Beaudouin-Lafon2000, AUTHOR = {Michel Beaudouin-Lafon}, TITLE = {Instrumental interaction: an interaction model for designing post-WIMP user interfaces}, BOOKTITLE = {the SIGCHI conference on Human factors in computing systems}, YEAR = {2000}, PAGES = {446 - 453}, ADDRESS = {The Hague, The Netherlands}, ABSTRACT = {This article introduces a new interaction model called Instrumental Interaction that extends and generalizes the principles of direct manipulation. It covers existing interaction styles, including traditional WIMP interfaces, as well as new interaction styles such as two-handed input and augmented reality. It defines a design space for new interaction techniques and a set of properties for comparing them. Instrumental Interaction describes graphical user interfaces in terms of domain objects and interaction instruments. Interaction between users and domain objects is mediated by interaction instruments, similar to the tools and instruments we use in the real world to interact with physical objects. The article presents the model, applies it to describe and compare a number of interaction techniques, and shows how it was used to create a new interface for searching and replacing text.} }
@INPROCEEDINGS{Bekkerman2001, AUTHOR = {Ron Bekkerman and Ran El-Yaniv and Naftali Tishby and Yoad Winter}, TITLE = {On feature distributional clustering for text categorization}, BOOKTITLE = {the 24th annual international ACM SIGIR conference on Research and development in information retrieval}, YEAR = {2001}, PAGES = {146 - 153}, ADDRESS = {New Orleans, Louisiana, United States}, ABSTRACT = {We describe a text categorization approach that is based on a combination of feature distributional clusters with a support vector machine (SVM) classifier. Our feature selection approach employs distributional clustering of words via the recently introducedinformation bottleneck method, which generates a more efficientword-clusterrepresentation of documents. Combined with the classification power of an SVM, this method yields high performance text categorization that can outperform other recent methods in terms of categorization accuracy and representation efficiency. Comparing the accuracy of our method with other techniques, we observe significant dependency of the results on the data set. We discuss the potential reasons for this dependency.} }
@BOOK{Bergin2004, TITLE = {Karel The Robot: A Gentle Introduction to the Art of Programming}, PUBLISHER = {Wiley}, YEAR = {1994}, AUTHOR = {Joseph Bergin and Mark Stehlik and Jim Roberts and Richard Pattis}, EDITION = {2nd Edition}, HOWPUBLISHED = {Retrieved from http://csis.pace.edu/\~{}bergin/KarelJava2ed/karelexperimental.html}, OWNER = {dkkang}, TIMESTAMP = {2005.12.20} }
@ARTICLE{BernersLee2001, AUTHOR = {Berners-Lee, Tim and Hendler, James and Lassila, Ora}, TITLE = {The Semantic Web}, JOURNAL = {Scientific American}, YEAR = {2001}, MONTH = {May}, URL = { http://www.sciam.com/article.cfm?articleID=00048144-10D2-1C70-84A9809EC588EF21} }
@INPROCEEDINGS{Bernstein2003, AUTHOR = {Daniel S. Bernstein and Zhengzhu Feng and Brian Neil Levine and Shlomo Zilberstein}, TITLE = {Adaptive Peer Selection}, BOOKTITLE = {the 2nd International Workshop on Peer-to-Peer Systems (IPTPS)}, YEAR = {2003}, ADDRESS = {Berkeley, California} }
@BOOK{Bishop1996, TITLE = {Neural networks for pattern recognition}, PUBLISHER = {Oxford University Press}, YEAR = {1996}, AUTHOR = {Christopher M. Bishop}, ISBN = {0-19-853849-9} }
@INPROCEEDINGS{bishop95standard, AUTHOR = {Matt Bishop}, TITLE = {A Standard Audit Trail Format}, BOOKTITLE = {Proceedings of 18th {NIST}-{NCSC} National Information Systems Security Conference}, YEAR = {1995}, PAGES = {136--145}, URL = {citeseer.ist.psu.edu/bishop95standard.html} }
@MISC{Blake+Merz:1998, AUTHOR = {C.L. Blake and C.J. Merz}, TITLE = {{UCI} Repository of machine learning databases}, YEAR = {1998}, INSTITUTION = {University of California, Irvine, Dept. of Information and Computer Sciences}, URL = {http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html} }
@ARTICLE{Bloom1970, AUTHOR = {Burton Bloom}, TITLE = {Space/time trade-offs in hash coding with allowable errors}, JOURNAL = {Communications of the ACM}, YEAR = {1970}, VOLUME = {13}, PAGES = {422-426}, NUMBER = {7}, ABSTRACT = {In this paper trade-offs among certain computational factors in hash coding are analyzed. The paradigm problem considered is that of testing a series of messages one-by-one for membership in a given set of messages. Two new hash-coding methods are examined and compared with a particular conventional hash-coding method. The computational factors considered are the size of the hash area (space), the time required to identify a message as a nonmember of the given set (reject time), and an allowable error frequency. The new methods are intended to reduce the amount of space required to contain the hash-coded information from that associated with conventional methods. The reduction in space is accomplished by exploiting the possibility that a small fraction of errors of commission may be tolerable in some applications, in particular, applications in which a large amount of data is involved and a core resident hash area is consequently not feasible using conventional methods. In such applications, it is envisaged that overall performance could be improved by using a smaller core resident hash area in conjunction with the new methods and, when necessary, by using some secondary and perhaps time-consuming test to “catch” the small fraction of errors associated with the new methods. An example is discussed which illustrates possible areas of application for the new methods. Analysis of the paradigm problem demonstrates that allowing a small number of test messages to be falsely identified as members of the given set will permit a much smaller hash area to be used without increasing reject time.} }
@INPROCEEDINGS{Blum1990, AUTHOR = {Avrim Blum}, TITLE = {Learning boolean functions in an infinite attribute space}, BOOKTITLE = {the twenty-second annual ACM symposium on Theory of computing}, YEAR = {1990}, PAGES = {64-72}, ADDRESS = {Baltimore, Maryland, United States}, PUBLISHER = {ACM Press, New York, NY, USA} }
@INPROCEEDINGS{Blum1994, AUTHOR = {Avrim Blum and Merrick Furst and Jeffrey Jackson and Michael Kearns and Yishay Mansour}, TITLE = {Weakly Learning DNF and Characterizing Statistical Query Learning Using Fourier Analysis}, BOOKTITLE = {the 26th ACM Symposium on the Theory of Computing}, YEAR = {1994}, PAGES = {253-262}, ADDRESS = {New York, NY}, PUBLISHER = {ACM Press} }
@ARTICLE{Blumer1989, AUTHOR = {Anselm Blumer and Andrzej Ehrenfeucht and David Haussler and Manfred K. Warmuth}, TITLE = {Learnability and the Vapnik-- Chervonenkis dimension}, JOURNAL = {Journal of the ACM}, YEAR = {1989}, VOLUME = {36}, PAGES = {929-965}, NUMBER = {4} }
@INPROCEEDINGS{Board1990, AUTHOR = {Raymond Board and Leonard Pitt}, TITLE = {On the necessity of Occam algorithms}, BOOKTITLE = {the Twenty Second Annual ACM Symposium on Theory of Computing}, YEAR = {1990}, PAGES = {54-63}, ADDRESS = {Baltimore, Maryland} }
@BOOK{Borenstein1996, TITLE = {Navigating Mobile Robots: Systems and Techniques}, PUBLISHER = {AK Peters, Ltd.}, YEAR = {1996}, AUTHOR = {J. Borenstein and H. R. Everett and Liqiang Feng}, ISBN = {156881058X} }
@INPROCEEDINGS{ILP99-Bostrom-Asker, AUTHOR = {H. Bostr{\"o}m and L. Asker}, TITLE = {Combining Divide-and-Conquer and Separate-and-Conquer for Efficient and Effective Rule Induction}, BOOKTITLE = {Proceedings of the 9th International Workshop on Inductive Logic Programming (ILP99)}, YEAR = {1999}, EDITOR = {S. D\v{z}eroski and P. Flach}, VOLUME = {1634}, SERIES = {Lecture Notes in Artificial Intelligence (LNAI)}, PAGES = {33--43}, PUBLISHER = {Springer-Verlag}, ISBN = {3-54066-109-3} }
@INPROCEEDINGS{Bowling2003, AUTHOR = {Michael Bowling and Michael Littman}, TITLE = {Multiagent Learning: A Game Theoretic Perspective}, BOOKTITLE = {The 2003 International Joint Conference on Artificial Intelligence}, YEAR = {2003}, NOTE = {Tutorial} }
@BOOK{Braitenberg1986, TITLE = {Vehicles: Experiments in Synthetic Psychology}, PUBLISHER = {The MIT Press; Reprint edition}, YEAR = {1986}, AUTHOR = {Valentino Braitenberg}, MONTH = {Febrary}, ISBN = {262521121} }
@ARTICLE{Brin1998, AUTHOR = {Sergey Brin and Lawrence Page}, TITLE = {The Anatomy of a Large-Scale Hypertextual Web Search Engine}, JOURNAL = {Computer Networks and ISDN Systems}, YEAR = {1998}, VOLUME = {30}, PAGES = {107--117}, NUMBER = {1-7} }
@BOOK{Brooks2002, TITLE = {Flesh and Machines: How Robots Will Change Us}, PUBLISHER = {Pantheon; 1st edition}, YEAR = {2002}, AUTHOR = {Rodney Brooks}, MONTH = {Febrary}, ISBN = {375420797} }
@BOOK{Brooks1999, TITLE = {Cambrian Intelligence: The Early History of the New AI}, PUBLISHER = {The MIT Press}, YEAR = {1999}, AUTHOR = {Rodney Brooks}, MONTH = {July}, ISBN = {262522632} }
@TECHREPORT{brown94vision, AUTHOR = {Christopher M. Brown}, TITLE = {Vision, Learning, and Development}, INSTITUTION = {The University of Rochester, Computer Science Department}, YEAR = {1994}, NUMBER = {TR492}, MONTH = {Febrary} }
@INPROCEEDINGS{Brown2000, AUTHOR = {Michael P. S. Brown and William Noble Grundy and David Lin and Nello Cristianini and Charles Sugnet and Terrence S. Furey and Manuel Ares, Jr. and David Haussler and Michael Kearns and Nick Littlestone and Manfred K. Warmuth}, TITLE = {Knowledge-based Analysis of Microarray Gene Expression Data Using Support Vector Machines}, BOOKTITLE = {the National Academy of Sciences}, YEAR = {2000}, VOLUME = {97}, PAGES = {262-267}, ABSTRACT = {We introduce a new method of functionally classifying genes using gene expression data from DNA microarray hybridization experiments. The method is based on the theory of support vector machines. SVMs are considered a supervised computer learning method because they exploit prior knowledge of gene function to identify unknown genes of similar function from expression data. SVMs avoid several problems associated with unsupervised clustering methods such as hierarchical clustering methods and self organizing maps. SVMs have many mathematical features that make them attractive for gene expression analysis, including their flexibility in choosing a similarity function, sparseness of solution when dealing with large data sets, the ability to handle large feature spaces, and the ability to identify outliers. We test several SVMs that use different similarity metrics, as well as some other supervised learning methods, and find that the SVMs best identify sets of genes with a common function using expression data. Finally, we use SVMs to predict functional roles for uncharacterized yeast ORFs based on their expression data.} }
@INPROCEEDINGS{Buja2001, AUTHOR = {Andreas Buja and Yung-Seop Lee}, TITLE = {Data mining criteria for tree-based regression and classification}, BOOKTITLE = {the seventh ACM SIGKDD international conference on Knowledge discovery and data mining}, YEAR = {2001}, PAGES = {27 - 36}, ADDRESS = {San Francisco, California}, ABSTRACT = {This paper is concerned with the construction of regression and classification trees that are more adapted to data mining applications than conventional trees. To this end, we propose new splitting criteria for growing trees. Conventional splitting criteria attempt to perform well on both sides of a split by attempting a compromise in the quality of fit between the left and the right side. By contrast, we adopt a data mining point of view by proposing criteria that search for interesting subsets of the data, as opposed to modeling all of the data equally well. The new criteria do not split based on a compromise between the left and the right bucket; they effectively pick the more interesting bucket and ignore the other.As expected, the result is often a simpler characterization of interesting subsets of the data. Less expected is that the new criteria often yield whole trees that provide more interpretable data descriptions. Surprisingly, it is a "flaw" that works to their advantage: The new criteria have an increased tendency to accept splits near the boundaries of the predictor ranges. This so-called "end-cut problem" leads to the repeated peeling of small layers of data and results in very unbalanced but highly expressive and interpretable trees.} }
@ARTICLE{Burbea1982a, AUTHOR = {Burbea, J. and Rao, C. R.}, TITLE = {Entropy Differential Metric, Distance and Divergence Measures in Probability Spaces: A Unified Approach}, JOURNAL = {J. Multi. Analysis}, YEAR = {1982}, VOLUME = {12}, PAGES = {575-596} }
@ARTICLE{Burbea1982b, AUTHOR = {Burbea, J. and Rao, C. R.}, TITLE = {On the Convexity of Some Divergence Measures Based on Entropy Functions}, JOURNAL = {IEEE Trans. on Inform. Theory}, YEAR = {1982}, VOLUME = {IT-28}, PAGES = {489-495} }
@BOOK{Burnham02, TITLE = {Model Selection and Multi-Model Inference}, PUBLISHER = {Springer}, YEAR = {2002}, AUTHOR = {Kenneth P. Burnham and David Anderson}, EDITION = {2}, MONTH = {July} }
@INPROCEEDINGS{Byrd1999, AUTHOR = {Donald Byrd}, TITLE = {A Scrollbar-based Visualization for Document Navigation}, BOOKTITLE = {the Fourth ACM International Conference on Digital Libraries}, YEAR = {1999}, ADDRESS = {Berkeley, CA} }
@ARTICLE{DBLP:journals/candc/CaiLC02, AUTHOR = {Yu-Dong Cai and Xiao-Jun Liu and Kuo-Chen Chou}, TITLE = {Artificial Neural Network Model for Predicting Protein Subcellular Location.}, JOURNAL = {Computers {\&} Chemistry}, YEAR = {2002}, VOLUME = {26}, PAGES = {179-182}, NUMBER = {2}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, EE = {http://dx.doi.org/10.1016/S0097-8485(01)00106-1} }
@ARTICLE{Cancedda2003, AUTHOR = {Nicola Cancedda and Eric Gaussier and Cyril Goutte and Jean Michel Renders}, TITLE = {Word sequence kernels}, JOURNAL = {The Journal of Machine Learning Research}, YEAR = {2003}, VOLUME = {3}, PAGES = {1059 - 1082}, NUMBER = {Special issue} }
@ARTICLE{Capelle1998, AUTHOR = {C. Capelle and C. Trullemans and P. Arno and C. Veraart}, TITLE = {A Real-Time Experimental Prototype for Enhancement of Vision Rehabilitation Using Auditory Substitution}, JOURNAL = {IEEE Trans. Biomed. Eng.}, YEAR = {1998}, VOLUME = {45}, PAGES = {1279-1293}, MONTH = {October} }
@INPROCEEDINGS{CarageaRSH03, AUTHOR = {Doina Caragea and Jaime Reinoso and Adrian Silvescu and Vasant Honavar}, TITLE = {Statistics Gathering for Learning from Distributed, Heterogeneous and Autonomous Data Sources.}, BOOKTITLE = {Proceedings of IJCAI-03 Workshop on Information Integration on the Web (IIWeb-03), August 9-10, 2003, Acapulco, Mexico}, YEAR = {2003}, PAGES = {99-104}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, EE = {http://www.isi.edu/info-agents/workshops/ijcai03/papers/caragea1.pdf} }
@INCOLLECTION{Caragea2001, AUTHOR = {Doina Caragea and Adrian Silvescu and Vasant Honavar}, TITLE = {Towards a Theoretical Framework for Analysis and Synthesis of Agents That Learn from Distributed Dynamic Data Sources}, BOOKTITLE = {Emerging Neural Architectures Based on Neuroscience}, PUBLISHER = {Springer-Verlag.}, YEAR = {2001}, VOLUME = {Invited Chapter}, ADDRESS = {Berlin} }
@ARTICLE{Cessie1992, AUTHOR = {S. Le Cessie and JC Van Houwelingen}, TITLE = {Ridge Estimators in Logistic Regression}, JOURNAL = {Applied Statistics}, YEAR = {1992}, VOLUME = {41}, PAGES = {191--201}, NUMBER = {1} }
@TECHREPORT{Chaturvedi2005, AUTHOR = {Abhishek Chaturvedi and Sandeep Bhatkar and R. Sekar}, TITLE = {Improving Attack Detection in Host-Based IDS by Learning Properties of System Call Arguments}, INSTITUTION = {Department of Computer Science, Stony Brook University}, YEAR = {2005}, NUMBER = {SECLAB-05-03}, MONTH = {July}, OWNER = {dkkang}, TIMESTAMP = {2006.01.18} }
@INPROCEEDINGS{Cho2002, AUTHOR = {J. Cho and H. Garcia-Molina}, TITLE = {Parallel Crawlers}, BOOKTITLE = {11th International World-Wide Web Conference}, YEAR = {2002} }
@INPROCEEDINGS{Cimiano2004, AUTHOR = {Philipp Cimiano and Andreas Hotho and Steffen Staab}, TITLE = {Comparing Conceptual, Partitional and Agglomerative Clustering for Learning Taxonomies from Text}, BOOKTITLE = {Proceedings of the European Conference on Artificial Intelligence (ECAI'04)}, YEAR = {2004}, URL = {http://www.aifb.uni-karlsruhe.de/WBS/pci/ecai04.pdf} }
@INPROCEEDINGS{Cimiano2003, AUTHOR = {Philipp Cimiano and Steffen Staab and Julien Tane}, TITLE = {Automatic Acquisition of Taxonomies from Text: FCA meets NLP}, BOOKTITLE = {Proceedings of the ECML/PKDD Workshop on Adaptive Text Extraction and Mining, Cavtat--Dubrovnik, Croatia}, YEAR = {2003}, PAGES = {10--17}, URL = {http://www.aifb.uni-karlsruhe.de/WBS/pci/ontolearning.pdf} }
@BOOK{Clark2003, TITLE = {Natural-Born Cyborgs: Minds, Technologies, and the Future of Human Intelligence}, PUBLISHER = {Oxford University Press}, YEAR = {2004}, AUTHOR = {Andy Clark} }
@INPROCEEDINGS{Cohen1998, AUTHOR = {William W. Cohen}, TITLE = {A Web-based Information System that Reasons with Structured Collections of Text}, BOOKTITLE = {the 2nd International Conference on Autonomous Agents (Agents'98)}, YEAR = {1998}, PAGES = {400--407}, ADDRESS = {New York}, ABSTRACT = {The degree to which information sources are pre-processed by Web-based information systems varies greatly. In search engines like Altavista, little pre-processing is done, while in "knowledge integration" systems, complex site-specific "wrappers" are used integrate different information sources into a common database representation. In this paper we describe an intermediate between these two models. In our system, information sources are converted into a highly structured collection of small...} }
@INPROCEEDINGS{cohen95fast, AUTHOR = {William W. Cohen}, TITLE = {Fast Effective Rule Induction}, BOOKTITLE = {Proc. of the 12th International Conference on Machine Learning}, YEAR = {1995}, EDITOR = {Armand Prieditis and Stuart Russell}, PAGES = {115--123}, ADDRESS = {Tahoe City, CA}, MONTH = {July}, PUBLISHER = {Morgan Kaufmann}, ISBN = {1-55860-377-8}, URL = {http://citeseer.nj.nec.com/cohen95fast.html} }
@INPROCEEDINGS{Collins2001, AUTHOR = {Michael Collins and Sanjoy Dasgupta and Robert E. Schapire}, TITLE = {A Generalization of Principal Component Analysis to the Exponential Family}, BOOKTITLE = {NIPS}, YEAR = {2001}, ABSTRACT = {Principal component analysis (PCA) is a commonly applied technique for dimensionality reduction. PCA implicitly minimizes a squared loss function, which may be inappropriate for data that is not real-valued, such as binary-valued data. This paper draws on ideas from the Exponential family, Generalized linear models, and Bregman distances, to give a generalization of PCA to loss functions that we argue are better suited to other data types. We describe algorithms for minimizing the loss...} }
@ARTICLE{Cortes-Vapnik, AUTHOR = {Corinna Cortes and Vladimir Vapnik}, TITLE = {Support-Vector Networks}, JOURNAL = {Mach. Learn.}, YEAR = {1995}, VOLUME = {20}, PAGES = {273--297}, NUMBER = {3}, ISSN = {0885-6125}, PUBLISHER = {Kluwer Academic Publishers} }
@INPROCEEDINGS{Coull2003, AUTHOR = {Scott Coull and Joel Branch and Boleslaw Szymanski and Eric Breimer}, TITLE = {Intrusion Detection: A Bioinformatics Approach}, BOOKTITLE = {19th Annual Computer Security Applications Conference}, YEAR = {2003}, ADDRESS = {Las Vegas, Nevada}, ABSTRACT = {This paper addresses the problem of detecting masquerading, a security attack in which an intruder assumes the identity of a legitimate user. Many approaches based on Hidden Markov Models and various forms of Finite State Automata were proposed to solve this problem. The novelty of our approach results from application of techniques used in bioinformatics for a pair-wise sequence alignment to compare the monitored session with the past user behavior. Our algorithm uses a semi-global alignment and a unique scoring system to measure similarity between a sequence of commands produced by a potential intruder and the user signature, which is a sequence of commands collected from a legitimate user. We tested this algorithm on the standard intrusion data collection set. As discussed in the paper, the results of the test showed that the described algorithm yields a promising combination of intrusion detection rate and false positive rate, when compared to the published intrusion detection algorithms.}, KEYWORDS = {Intrusion detection, sequence alignment, bioinformatics, masquerade detection, pattern matching} }
@BOOK{Crain1991, TITLE = {Theories of Development: Concepts and Applications}, PUBLISHER = {Prentice Hall; 3 edition}, YEAR = {1991}, AUTHOR = {William Crain}, MONTH = {November}, ISBN = {013913476X} }
@INBOOK{Crain1991Chap6, CHAPTER = {Piaget's Cognitive-Developmental Theory}, TITLE = {Theories of Development: Concepts and Applications}, PUBLISHER = {Prentice Hall; 3 edition}, YEAR = {1991}, AUTHOR = {William Crain}, MONTH = {November}, ISBN = {013913476X} }
@INPROCEEDINGS{Cristianini2001, AUTHOR = {Nello Cristianini and John Shawe-Taylor and Huma Lodhi}, TITLE = {Latent Semantic Kernels}, BOOKTITLE = {the Eighteenth International Conference on Machine Learning}, YEAR = {2001}, PAGES = {66-73} }
@INPROCEEDINGS{Cumby2003, AUTHOR = {Chad Cumby and Dan Roth}, TITLE = {On Kernel Methods for Relational Learning}, BOOKTITLE = {ICML 2003}, YEAR = {2003}, PAGES = {107-114}, ABSTRACT = {Kernel methods have gained a great deal of popularity in the machine learning community as a method to learn indirectly in high-dimensional feature spaces. Those interested in relational learning have recently begun to cast learning from structured and relational data in terms of kernel operations. We describe a general family of kernel functions built up from a description language of limited expressivity and use it to study the benefits and drawbacks of kernel learning in relational domains. Learning with kernels in this family directly models learning over an expanded feature space constructed using the same description language. This allows us to examine issues of time complexity in terms of learning with these and other relational kernels, and how these relate to generalization ability. The tradeoffs between using kernels in a very high dimensional implicit space versus a restricted feature space, is highlighted through two experiments, in bioinformatics and in natural language processing.} }
@INPROCEEDINGS{DzeSchHei96-ILP96, AUTHOR = {D\v{z}eroski, S. and Schulze-Kremer, S. and Heidtke, K.R. and Siems, K. and Wettschereck, D.}, TITLE = {Applying {ILP} to Diterpene Structure Elucidation from $^{13}${C} {NMR} Spectra}, BOOKTITLE = {Proceedings of the 6th International Workshop on Inductive Logic Programming (ILP96)}, YEAR = {1996}, EDITOR = {Muggleton, S.}, VOLUME = {1314}, SERIES = {Lecture Notes in Artificial Intelligence (LNAI)}, PAGES = {41--54}, PUBLISHER = {Springer-Verlag} }
@INPROCEEDINGS{Darwiche2002, AUTHOR = {Adnan Darwiche}, TITLE = {A Logical Approach for Factoring Belief Networks}, BOOKTITLE = {KR 2002}, YEAR = {2002}, PAGES = {409-420}, ABSTRACT = {We have shown recently that a belief network can be represented as a polynomial and that many probabilistic queries can be recovered in constant time from the partial derivatives of such a polynomial. Although this polynomial is exponential in size, we have shown that it can be "computed" using an arithmetic circuit whose size is not necessarily exponential. Hence, the key computational question becomes that of generating the smallest arithmetic circuit that computes the network...} }
@INPROCEEDINGS{Darwiche2000, AUTHOR = {Adnan Darwiche}, TITLE = {A Differential Approach to Inference in Bayesian Networks}, BOOKTITLE = {Uncertainty in Artificial Intelligence}, YEAR = {2000}, ABSTRACT = {We present a new approach for inference in Bayesian networks, which is mainly based on partial differentiation. According to this approach, one compiles a Bayesian network into a multivariate polynomial and then computes the partial derivatives of this polynomial with respect to each variable. We show that once such derivatives are made available, one can compute in constant-time answers to a large class of probabilistic queries, which are central to classical inference, parameter estimation,...} }
@ARTICLE{Debnath1991, AUTHOR = {A.K. Debnath and R.L. Lopez de Compadre and G. Debnath and A.J. Shusterman and C. Hansch}, TITLE = {Structure-Activity Relationship of Mutagenic Aromatic and Heteroaromatic Nitro Compounds. Correlation with Molecular Orbital Energies and Hydrophobicity.}, JOURNAL = {J Med Chem.}, YEAR = {1991}, VOLUME = {34}, PAGES = {786-797}, NUMBER = {2}, MONTH = {Feb.} }
@INPROCEEDINGS{Dechter1997, AUTHOR = {Rina Dechter}, TITLE = {Mini-Buckets: A General Scheme For Generating Approximations In Automated Reasoning}, BOOKTITLE = {Fifteenth International Joint Conference of Artificial Intelligence (IJCAI97)}, YEAR = {1997}, ADDRESS = {Japan} }
@INPROCEEDINGS{Dechter1996, AUTHOR = {Rina Dechter}, TITLE = {Bucket elimination: A unifying framework for probabilistic inference}, BOOKTITLE = {Twelthth Conf. on Uncertainty in Artificial Intelligence}, YEAR = {1996}, PAGES = {211--219}, ABSTRACT = {Probabilistic inference algorithms for finding the most probable explanation, the maximum aposteriori hypothesis, and the maximum expected utility and for updating belief are reformulated as an elimination--type algorithm called bucket elimination. This emphasizes the principle common to many of the algorithms appearing in that literature and clarifies their relationship to nonserial dynamic programming algorithms. We also present a general way of combining conditioning and elimination within...} }
@ARTICLE{Deerwester1990, AUTHOR = {Scott Deerwester and Susan T. Dumais and George W. Furnas and Thomas K. Landauer and Richard Harshman}, TITLE = {Indexing by Latent Semantic Analysis}, JOURNAL = {Journal of the American Society of Information Science}, YEAR = {1990}, VOLUME = {41}, PAGES = {391-407}, NUMBER = {6}, ABSTRACT = {A new method for automatic indexing and retrieval is described. The approach is to take advantage of implicit higher-order structure in the association of terms with documents ("semantic structure") in order to improve the detection of relevant documents on the basis of terms found in queries. The particular technique used is singular-value decomposition, in which a large term by document matrix is decomposed into a set of ca 100 orthogonal factors from which the original matrix can be...} }
@INPROCEEDINGS{Demmer1998, AUTHOR = {Michael J. Demmer and Maurice P. Herlihy}, TITLE = {The Arrow Distributed Directory Protocol}, BOOKTITLE = {12th International Symposium on Distributed Computing}, YEAR = {1998}, PAGES = {119-133}, ADDRESS = {Greece}, ABSTRACT = {Most practical techniques for locating remote objects in a distributed system su er from problems of scalability and locality of reference. We have devised the Arrow distributed directory protocol, a scalable and local mechanism for ensuring mutually exclusive access to mobile objects. This directory has communication complexity optimal within a factor of (1 +MST-stretch(G))=2, where MST-stretch(G) is the \minimum spanning tree stretch" of the underlying network. 1 Introduction Many...} }
@BOOK{Denneberg1994, TITLE = {Non-additive Measure and Integral}, PUBLISHER = {Kluwer Academic Publishers, Dordrecht}, YEAR = {1994}, AUTHOR = {D. Denneberg} }
@ARTICLE{Denning1987, AUTHOR = {Dorothy E. Denning}, TITLE = {An intrusion-detection model}, JOURNAL = {IEEE Trans. Softw. Eng.}, YEAR = {1987}, VOLUME = {13}, PAGES = {222--232}, NUMBER = {2}, ISSN = {0098-5589}, PUBLISHER = {IEEE Press} }
@INPROCEEDINGS{desJardins2000, AUTHOR = {Marie desJardins and Lise Getoor and Daphne Koller}, TITLE = {Using Feature Hierarchies in Bayesian Network Learning}, BOOKTITLE = {SARA '02: Proceedings of the 4th International Symposium on Abstraction, Reformulation, and Approximation}, YEAR = {2000}, PAGES = {260--270}, PUBLISHER = {Springer-Verlag}, ISBN = {3-540-67839-5} }
@ARTICLE{Dhar1993, AUTHOR = {V. Dhar and A. Tuzhilin}, TITLE = {Abstract-Driven Pattern Discovery in Databases}, JOURNAL = {IEEE Transactions on Knowledge and Data Engineering}, YEAR = {1993}, VOLUME = {5}, PAGES = {926--938}, NUMBER = {6}, DOI = {http://dx.doi.org/10.1109/69.250075}, ISSN = {1041-4347}, PUBLISHER = {IEEE Educational Activities Department} }
@INPROCEEDINGS{Dhillon2001, AUTHOR = {Inderjit S. Dhillon}, TITLE = {Co-clustering documents and words using bipartite spectral graph partitioning}, BOOKTITLE = {Knowledge Discovery and Data Mining}, YEAR = {2001}, PAGES = {269-274} }
@INPROCEEDINGS{Dickerson2001, AUTHOR = {John E. Dickerson and Jukka Juslin and Ourania Koukousoula and Julie A. Dickerson}, TITLE = {Fuzzy intrusion detection}, BOOKTITLE = {IFSA World Congress and 20th North American Fuzzy Information Processing Society (NAFIPS) International Conference}, YEAR = {2001}, PAGES = {1506-1510}, ADDRESS = {Vancouver, British Columbia} }
@INPROCEEDINGS{Dissanayake2000, AUTHOR = {M. W. M. G. Dissanayake and P. Newman and Hugh F. Durrant-Whyte and Steve Clark and M. Csorba}, TITLE = {An Experimental and Theoretical Investigation into Simultaneous Localisation and Map Building}, BOOKTITLE = {The Sixth International Symposium on Experimental Robotics VI}, YEAR = {2000}, PAGES = {265--274}, ADDRESS = {London, UK}, PUBLISHER = {Springer-Verlag}, ISBN = {1-85233-210-7} }
@INPROCEEDINGS{Doan2002, AUTHOR = {AnHai Doan and Jayant Madhavan and Pedro Domingos and Alon Halevy}, TITLE = {Learning to Map between Ontologies on the Semantic Web}, BOOKTITLE = {the eleventh international conference on World Wide Web}, YEAR = {2002}, ADDRESS = {Honolulu, Hawaii, USA}, ABSTRACT = {Ontologies play a prominent role on the Semantic Web. They make possible the widespread publication of machine understandable data, opening myriad opportunities for automated information processing. However, because of the Semantic Web's distributed nature, data on it will inevitably come from many different ontologies. Information processing across ontologies is not possible without knowing the semantic mappings between their elements. Manually finding such mappings is tedious, error-prone, and clearly not possible at the Web scale. Hence, the development of tools to assist in the ontology mapping process is crucial to the success of the Semantic Web.We describe glue, a system that employs machine learning techniques to find such mappings. Given two ontologies, for each concept in one ontology glue finds the most similar concept in the other ontology. We give well-founded probabilistic definitions to several practical similarity measures, and show that glue can work with all of them. This is in contrast to most existing approaches, which deal with a single similarity measure. Another key feature of glue is that it uses multiple learning strategies, each of which exploits a different type of information either in the data instances or in the taxonomic structure of the ontologies. To further improve matching accuracy, we extend glue to incorporate commonsense knowledge and domain constraints into the matching process. For this purpose, we show that relaxation labeling, a well-known constraint optimization technique used in computer vision and other fields, can be adapted to work efficiently in our context. Our approach is thus distinguished in that it works with a variety of well-defined similarity notions and that it efficiently incorporates multiple types of knowledge. We describe a set of experiments on several real-world domains, and show that glue proposes highly accurate semantic mappings.} }
@INPROCEEDINGS{Domingos1998, AUTHOR = {Pedro Domingos}, TITLE = {Occam's two razors: the sharp and the blunt}, BOOKTITLE = {Proc. 4 th Int Conf Knowledge Discovery and Data Mining}, YEAR = {1998}, PAGES = {37--43}, PUBLISHER = {AAAI Press} }
@ARTICLE{domingos97optimality, AUTHOR = {Pedro Domingos and Michael J. Pazzani}, TITLE = {On the Optimality of the Simple Bayesian Classifier under Zero-One Loss}, JOURNAL = {Machine Learning}, YEAR = {1997}, VOLUME = {29}, PAGES = {103--130}, NUMBER = {2-3} }
@INPROCEEDINGS{domingos96beyond, AUTHOR = {Pedro Domingos and Michael J. Pazzani}, TITLE = {Beyond Independence: Conditions for the Optimality of the Simple Bayesian Classifier}, BOOKTITLE = {International Conference on Machine Learning}, YEAR = {1996}, PAGES = {105-112}, URL = {citeseer.ist.psu.edu/domingos96beyond.html} }
@INPROCEEDINGS{Donlon1999, AUTHOR = {J. Donlon and K. Forbus}, TITLE = {Using a geographic information system for qualitative spatial reasoning about trafficability}, BOOKTITLE = {Proceedings of the Qualitative Reasoning Workshop}, YEAR = {1999}, ADDRESS = {Loch Awe, Scotland} }
@INPROCEEDINGS{Doorenbos1997, AUTHOR = {Robert B. Doorenbos and Oren Etzioni and Daniel S. Weld}, TITLE = {A scalable comparison-shopping agent for the World-Wide Web}, BOOKTITLE = {the first international conference on Autonomous agents}, YEAR = {1997}, PAGES = {39 - 48}, ADDRESS = {Marina del Rey, California} }
@INPROCEEDINGS{Druschel2002, AUTHOR = {Peter Druschel and Sitaram Iyer and Antony Rowstron}, TITLE = {Squirrel: A decentralized peer to peer web cache}, BOOKTITLE = {PODC 2002}, YEAR = {2002} }
@BOOK{Duda2000, TITLE = {Pattern Classification (2nd Edition)}, PUBLISHER = {Wiley-Interscience}, YEAR = {2000}, AUTHOR = {Richard O. Duda and Peter E. Hart and David G. Stork}, ISBN = {471056693} }
@INPROCEEDINGS{Dumais1998, AUTHOR = {Susan Dumais and John Platt and David Heckerman and Mehran Sahami}, TITLE = {Inductive learning algorithms and representations for text categorization}, BOOKTITLE = {CIKM '98: Proceedings of the seventh international conference on Information and knowledge management}, YEAR = {1998}, PAGES = {148--155}, PUBLISHER = {ACM Press}, DOI = {http://doi.acm.org/10.1145/288627.288651}, ISBN = {1-58113-061-9}, LOCATION = {Bethesda, Maryland, United States} }
@INPROCEEDINGS{Dzeroski1998, AUTHOR = {Saso Dzeroski and Luc De Raedt and Hendrik Blockeel}, TITLE = {Relational reinforcement learning}, BOOKTITLE = {International Workshop on Inductive Logic Programming}, YEAR = {1998}, PAGES = {136--143}, ADDRESS = {Madison, WI}, ABSTRACT = {Relational reinforcement learning is presented, a learning technique that combines reinforcement learning with relational learning or inductive logic programming. Due to the use of a more expressive representation language to represent states, actions and Qfunctions, relational reinforcement learning can be potentially applied to a new range of learning tasks. One such task that we investigate is planning in the blocks world, where it is assumed that the effects of the actions are ...} }
@INPROCEEDINGS{Easterlin1985, AUTHOR = {J.D. Easterlin and Pat Langley}, TITLE = {A framework for concept formation}, BOOKTITLE = {Proceedings of the Seventh Conference of the Cognitive Science Society}, YEAR = {1985}, PAGES = {267--271}, ADDRESS = {Irvine, CA, USA} }
@TECHREPORT{Endler2004, AUTHOR = {David Endler}, TITLE = {Intrusion Detection using Solaris' Basic Security Module}, INSTITUTION = {TechTarget, Inc.}, YEAR = {2004}, ADDRESS = {Needham, MA}, MONTH = {July}, OWNER = {dkkang}, TIMESTAMP = {2006.01.18}, URL = {http://www.securityfocus.com/print/infocus/1211} }
@INPROCEEDINGS{Engelson1992, AUTHOR = {S. Engelson and D. McDermott}, TITLE = {Error correction in mobile robot map learning}, BOOKTITLE = {Proceedings of the IEEE International Conference on Robotics \& Automation (ICRA)}, YEAR = {1992} }
@INPROCEEDINGS{Eskin2000, AUTHOR = {Eleazar Eskin}, TITLE = {Anomaly Detection over Noisy Data using Learned Probability Distributions}, BOOKTITLE = {the 2000 International Conference on Machine Learning (ICML-2000)}, YEAR = {2000}, ADDRESS = {Palo Alto, CA}, ABSTRACT = {Traditional anomaly detection techniques focus on detecting anomalies in new data after training on normal (or clean) data. In this paper we present a technique for detecting anomalies without training on normal data. We present a method for detecting anomalies within a data set that contains a large number of normal elements and relatively few anomalies. We present a mixture model for explaining the presence of anomalies in the data. Motivated by the model, the approach uses machine learning techniques to estimate a probability distribution over the data and applies a statistical test to detect the anomalies. The anomaly detection technique is applied to intrusion detection by examining intrusions manifested as anomalies in UNIX system call traces.} }
@ARTICLE{Eskin2002, AUTHOR = {Eleazar Eskin and Andrew Arnold and Michael Prerau and Leonid Portnoy and Salvatore Stolfo}, TITLE = {A Geometric Framework for Unsupervised Anomaly Detection: Detecting Intrusions in Unlabeled Data}, JOURNAL = {Data Mining for Security Applications}, YEAR = {2002} }
@ARTICLE{Estivill-Castro2002, AUTHOR = {Vladimir Estivill-Castro}, TITLE = {Why so many clustering algorithms: a position paper}, JOURNAL = {SIGKDD Explorations}, YEAR = {2002}, VOLUME = {4}, PAGES = {65-75}, NUMBER = {1} }
@INPROCEEDINGS{Eyheramendy2003, AUTHOR = {Susana Eyheramendy and David D. Lewis and David Madigan}, TITLE = {On the Naive Bayes Model for Text Categorization}, BOOKTITLE = {Ninth International Workshop on Artificial Intelligence and Statistics}, YEAR = {2003} }
@TECHREPORT{Fang1997, AUTHOR = {Weiwu Fang}, TITLE = {FDOD Function and the Information Discrepancy Contained in Multiple Probability Distributions}, INSTITUTION = {DIMACS Center, Rutgers University}, YEAR = {1997}, NUMBER = {DIMACS TR: 97-36}, ABSTRACT = {The concept of Shannon information has played a significant role in a variety of scientific and engineering areas. The question naturally arises: how can we measure information discrepancy contained in two or more probability distributions? The answer to this problem will be very interesting in both theory and practice. Some measures for the cases of two or three distributions have presented by the pioneers, but these measures have some disadvantages; moreover, there doesn't exist a measure for $n$ distributions so far. A FDOD function with many good properties has been introduced in the study of information discrepancy of judgments of multiple experts ( FW 1994). In this paper, based on the ideas concerned with Shannon information and measures of difference, we propose an axiom set for measuring the information discrepancy contained in a group of distributions, and prove that the only function satisfying the axiom set is of the FDOD form. The final results and even the intermediate results in deed show the close connection of the FDOD function with Shannon information and the measures of difference in statistics.} }
@TECHREPORT{Fawcett2003, AUTHOR = {Tom Fawcett}, TITLE = {{ROC} graphs: Notes and practical considerations for researchers}, INSTITUTION = {HP Labs}, YEAR = {2003}, NUMBER = {HPL-2003-4} }
@ARTICLE{Feigenbaum2001, AUTHOR = {Joan Feigenbaum and Christos H. Papadimitriou and Scott Shenker}, TITLE = {Sharing the Cost of Multicast Transmissions}, JOURNAL = {Journal of Computer and System Sciences}, YEAR = {2001}, VOLUME = {63}, PAGES = {21-41}, NUMBER = {1} }
@ARTICLE{Firestone1996, AUTHOR = {L. Firestone and S. Rupert and J. Olson and W. Mueller}, TITLE = {Automated Feature Extraction: The Key to Future Productivity}, JOURNAL = {Photogrammetric Engineering and Remote Sensing}, YEAR = {1996}, VOLUME = {62}, PAGES = {671--674}, NUMBER = {6} }
@INPROCEEDINGS{Flach2003, AUTHOR = {Peter A. Flach}, TITLE = {The Geometry of ROC Space: Understanding Machine Learning Metrics through ROC Isometrics}, BOOKTITLE = {the 20th International Conference on Machine Learning (ICML 2003)}, YEAR = {2003}, PAGES = {194-201}, PUBLISHER = {AAAI Press}, ABSTRACT = {Many different metrics are used in machine learning and data mining to build and evaluate models. However, there is no general theory of machine learning metrics, that could answer questions such as: When we simultaneously want to optimise two criteria, how can or should they be traded off? Some metrics are inherently independent of class and misclassification cost distributions, while other are not -- can this be made more precise? This paper provides a derivation of ROC space from first principles through 3D ROC space and the skew ratio, and redefines metrics in these dimensions. The paper demonstrates that the graphical depiction of machine learning metrics by means of ROC isometrics gives many useful insights into the characteristics of these metrics, and provides a foundation on which a theory of machine learning metrics can be built.} }
@ARTICLE{Flach2004, AUTHOR = {Peter Flach and Nicolas Lachiche}, TITLE = {Naive Bayesian Classification of Structured Data}, JOURNAL = {Machine Learning}, YEAR = {2004}, VOLUME = {57}, PAGES = {233--269} }
@INPROCEEDINGS{Forrest1996, AUTHOR = {Stephanie Forrest and Steven A. Hofmeyr and Anil Somayaji and Thomas A. Longstaff}, TITLE = {A Sense of Self for Unix Processes}, BOOKTITLE = {Proceedings of the 1996 IEEE Symposium on Security and Privacy}, YEAR = {1996}, PAGES = {120--128}, PUBLISHER = {IEEE Computer Society}, ISBN = {0-8186-7417-2} }
@ARTICLE{Freund1997, AUTHOR = {Yoav Freund and Robert E. Schapire}, TITLE = {A decision-theoretic generalization of on-line learning and an application to boosting}, JOURNAL = {Journal of Computer and System Sciences}, YEAR = {1997}, VOLUME = {55}, PAGES = {119 - 139}, NUMBER = {1} }
@INPROCEEDINGS{freund96experiments, AUTHOR = {Yoav Freund and Robert E. Schapire}, TITLE = {Experiments with a New Boosting Algorithm}, BOOKTITLE = {International Conference on Machine Learning}, YEAR = {1996}, PAGES = {148-156}, URL = {citeseer.ist.psu.edu/freund96experiments.html} }
@INPROCEEDINGS{Friedman1998, AUTHOR = {Nir Friedman}, TITLE = {The Bayesian Structural EM Algorithm}, BOOKTITLE = {Fourteenth Conf. on Uncertainty in Artificial Intelligence (UAI 98)}, YEAR = {1998}, ABSTRACT = {In recent years there has been a flurry of works on learning Bayesian networks from data. One of the hard problems in this area is how to effectively learn the structure of a belief network from incomplete data---that is, in the presence of missing values or hidden variables. In a recent paper, I introduced an algorithm called Structural EM that combines the standard Expectation Maximization (EM) algorithm, which optimizes parameters, with structure search for model selection. That algorithm learns networks based on penalized likelihood scores, which include the BIC/MDL score and various approximations to the Bayesian score. In this paper, I extend Structural EM to deal directly with Bayesian model selection. I prove the convergence of the resulting algorithm and show how to apply it for learning a large class of probabilistic models, including Bayesian networks and some variants thereof..} }
@ARTICLE{Friedman1997, AUTHOR = {Nir Friedman and Dan Geiger and Moises Goldszmidt}, TITLE = {Bayesian Network Classifiers}, JOURNAL = {Mach. Learn.}, YEAR = {1997}, VOLUME = {29}, PAGES = {131--163}, NUMBER = {2-3}, ISSN = {0885-6125}, PUBLISHER = {Kluwer Academic Publishers} }
@INPROCEEDINGS{DBLP:conf/ijcai/FriedmanGKP99, AUTHOR = {Nir Friedman and Lise Getoor and Daphne Koller and Avi Pfeffer}, TITLE = {Learning Probabilistic Relational Models.}, BOOKTITLE = {Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence, IJCAI 99, Stockholm, Sweden, July 31 - August 6, 1999. 2 Volumes, 1450 pages}, YEAR = {1999}, EDITOR = {Thomas Dean}, PAGES = {1300-1309}, PUBLISHER = {Morgan Kaufmann}, ISBN = {1-55860-613-0} }
@INPROCEEDINGS{Friedman1996, AUTHOR = {Nir Friedman and Moises Goldszmidt}, TITLE = {Building Classifiers using Bayesian Networks}, BOOKTITLE = {AAAI/IAAI}, YEAR = {1996}, VOLUME = {2}, PAGES = {1277-1284} }
@INPROCEEDINGS{Friedman2001, AUTHOR = {Nir Friedman and Daphne Koller}, TITLE = {Learning Bayesian Networks From Data}, BOOKTITLE = {NIPS 2001}, YEAR = {2001}, NOTE = {Tutorial} }
@ARTICLE{Friedman2002, AUTHOR = {Nir Friedman and Matan Ninio and Itsik Pe'er and Tal Pupko}, TITLE = {A Structural EM Algorithm for Phylogentic Inference}, JOURNAL = {Journal of Computational Biology}, YEAR = {2002}, VOLUME = {9}, PAGES = {331-353}, ABSTRACT = {A central task in the study of molecular evolution is the reconstruction of a phylogenetic tree from sequences of current-day taxa. The most established approach to tree reconstruction is maximum likelihood (ML) analysis. Unfortunately, searching for the maximum likelihood phylogenetic tree is computationally prohibitive for large data sets. In this paper, we describe a new algorithm that uses Structural EM for learning maximum likelihood phylogenetic trees. This algorithm is similar to the standard EM method for edge-length estimation, except that during iterations of the Structural EM algorithm the topology is improved as well as the edge length. Our algorithm performs iterations of two steps. In the E-Step, we use the current tree topology and edge lengths to compute expected su.cient statistics, which summarize the data. In the M-Step, we search for a topology that maximizes the likelihood with respect to these expected su.cient statistics. We show that searching for better topologies inside the M-step can be done e.ciently, as opposed to standard methods for topology search. We prove that each iteration of this procedure increases the likelihood of the topology, and thus the procedure must converge. This convergence point, however, can be a sub-optimal one. To escape from such “local optima? we further enhance our basic EM procedure by incorporating moves in the .avor of simulated annealing. We evaluate these new algorithms on both synthetic and real sequence data, and show that for protein sequences even our basic algorithm .nds more plausible trees than existing methods for searching maximum likelihood phylogenies. Furthermore, our algorithms are dramatically faster than such methods, enabling, for the .rst time, phylogenetic analysis of large protein data sets in the maximum likelihood framework.} }
@ARTICLE{Fua1996, AUTHOR = {P. Fua}, TITLE = {Model-based Optimization: Accurate and Consistent Site Modeling}, JOURNAL = {International Archives for Photogrammetry and Remote Sensing}, YEAR = {1996}, VOLUME = {31}, PAGES = {222--233}, NUMBER = {B3}, PUBLISHER = {Plenum Press} }
@INPROCEEDINGS{Forstner1987, AUTHOR = {W. F{\"o}rstner and E. Gulch}, TITLE = {A Fast Operator for Detection and Precise Location of Distinct Points, Corners and Centers of Circular Features}, BOOKTITLE = {Proceedings ISPRS Intercommission Workshop on Fast Processing of Photogrammetric Data}, YEAR = {1987}, ADDRESS = {Interlaken}, MONTH = {June} }
@INCOLLECTION{Gallistel1999, AUTHOR = {Charles R. Gallistel}, TITLE = {Coordinate transformations in the genesis of directed action}, BOOKTITLE = {Cognitive Science}, PUBLISHER = {Academic Press}, YEAR = {1999}, EDITOR = {Benjamin Bly and David Rumelhart}, PAGES = {1-42}, ADDRESS = {New York}, OWNER = {dkkang}, TIMESTAMP = {2005.11.23} }
@INPROCEEDINGS{gama98, AUTHOR = {Joao Gama}, TITLE = {Local Cascade Generalization}, BOOKTITLE = {ICML '98: Proceedings of the Fifteenth International Conference on Machine Learning}, YEAR = {1998}, PAGES = {206--214}, ADDRESS = {San Francisco, CA, USA}, PUBLISHER = {Morgan Kaufmann Publishers Inc.}, ISBN = {1-55860-556-8} }
@ARTICLE{gama00, AUTHOR = {Jo{\~a}o Gama and Pavel Brazdil}, TITLE = {Cascade Generalization}, JOURNAL = {Machine Learning}, YEAR = {2000}, VOLUME = {41}, PAGES = {315--343}, NUMBER = {3} }
@INPROCEEDINGS{Ganesan2003, AUTHOR = {Prasanna Ganesan and Qixiang Sun and Hector Garcia-Molina}, TITLE = {YAPPERS: A Peer-to-Peer Lookup Service over Arbitrary Topology}, BOOKTITLE = {IEEE INFOCOM}, YEAR = {2003}, ABSTRACT = {Existing peer-to-peer search networks generally fall into two categories: Gnutella-style systems that use arbitrary topology and rely on controlled flooding for search, and systems that explicitly build an underlying topology to efficiently support a distributed hash table (DHT). In this paper, we propose a hybrid scheme for building a peer-to-peer lookup service over arbitrary network topology. Specifically, for each node in the search network, we build a small DHT consisting of nearby nodes...} }
@INPROCEEDINGS{Ganti1999, AUTHOR = {Venkatesh Ganti and Johannes Gehrke and Raghu Ramakrishnan}, TITLE = {CACTUS - clustering categorical data using summaries}, BOOKTITLE = {Proceedings of the fifth ACM SIGKDD international conference on Knowledge discovery and data mining}, YEAR = {1999}, PAGES = {73--83}, PUBLISHER = {ACM Press}, DOI = {http://doi.acm.org/10.1145/312129.312201}, ISBN = {1-58113-143-7}, LOCATION = {San Diego, California, United States} }
@ARTICLE{Garofalakis2003, AUTHOR = {Minos Garofalakis and Aristides Gionis and Rajeev Rastogi, S. Seshadri and Kyuseok Shim}, TITLE = {XTRACT: Learning Document Type Descriptors from XML Document Collections}, JOURNAL = {Data Mining and Knowledge Discovery}, YEAR = {2003}, VOLUME = {7}, PAGES = {23-56} }
@INPROCEEDINGS{Gerkey2003, AUTHOR = {Brian P. Gerkey and Richard T. Vaughan and Andrew Howard}, TITLE = {The Player/Stage Project: Tools for Multi-Robot and Distributed Sensor Systems}, BOOKTITLE = {Proceedings of the International Conference on Advanced Robotics (ICAR)}, YEAR = {2003}, PAGES = {317-323}, ADDRESS = {Coimbra, Portugal}, MONTH = {Jul}, OWNER = {DK}, TIMESTAMP = {2006.03.06} }
@ARTICLE{Getoor2002, AUTHOR = {Lise Getoor and Nir Friedman and Daphne Koller and Benjamin Taskar}, TITLE = {Learning Probabilistic Models of Link Structure}, JOURNAL = {Journal of Machine Learning Research}, YEAR = {2002}, VOLUME = {3}, PAGES = {679 - 707}, NUMBER = {SPECIAL ISSUE}, ABSTRACT = {Most real-world data is heterogeneous and richly interconnected. Examples include the Web, hypertext, bibliometric data and social networks. In contrast, most statistical learning methods work with “flat?data representations, forcing us to convert our data into a form that loses much of the link structure. The recently introduced framework of probabilistic relational models (PRMs) embraces the object-relational nature of structured data by capturing probabilistic interactions between attributes of related entities. In this paper, we extend this framework by modeling interactions between the attributes and the link structure itself. An advantage of our approach is a unified generative model for both content and relational structure. We propose two mechanisms for representing a probabilistic distribution over link structures: reference uncertainty and existence uncertainty. We describe the appropriate conditions for using each model and present learning algorithms for each. We present experimental results showing that the learned models can be used to predict link structure and, moreover, the observed link structure can be used to provide better predictions for the attributes in the model.}, KEYWORDS = {Probabilistic Relational Models, Bayesian Networks, Relational Learning} }
@INPROCEEDINGS{Getoor2001, AUTHOR = {Lise Getoor and Nir Friedman and Daphne Koller and Benjamin Taskar}, TITLE = {Learning Probabilistic Models of Relational Structure}, BOOKTITLE = {ICML '01: Proceedings of the Eighteenth International Conference on Machine Learning}, YEAR = {2001}, PAGES = {170--177}, ADDRESS = {San Francisco, CA, USA}, PUBLISHER = {Morgan Kaufmann Publishers Inc.}, ISBN = {1-55860-778-1} }
@INPROCEEDINGS{Ghosh1999, AUTHOR = {Anup Ghosh and Aaron Schwartzbard}, TITLE = {A study in using neural networks for anomaly and misuse detection}, BOOKTITLE = {8th USENIX Security Symposium}, YEAR = {1999}, PAGES = {141-151}, ADDRESS = {Washington, D.C.} }
@ARTICLE{Gibson1998, AUTHOR = {David Gibson and Jon Kleinberg and Prabhakar Raghavan}, TITLE = {Clustering Categorical Data: An Approach Based on Dynamical Systems}, JOURNAL = {VLDB Journal: Very Large Data Bases}, YEAR = {1998}, VOLUME = {8}, PAGES = {222-236}, NUMBER = {3-4} }
@ARTICLE{gibson00clustering, AUTHOR = {David Gibson and Jon M. Kleinberg and Prabhakar Raghavan}, TITLE = {Clustering Categorical Data: An Approach Based on Dynamical Systems}, JOURNAL = {VLDB Journal: Very Large Data Bases}, YEAR = {2000}, VOLUME = {8}, PAGES = {222--236}, NUMBER = {3--4}, URL = {citeseer.ist.psu.edu/article/gibson98clustering.html} }
@ARTICLE{Gibson1988, AUTHOR = {Eleanor Gibson}, TITLE = {Exploratory behavior in the development of perceiving, acting, and the acquiring of knowledge}, JOURNAL = {Annual Review of Psychology}, YEAR = {1988}, VOLUME = {39}, PAGES = {1--41} }
@BOOK{Gibson1979, TITLE = {The ecological approach to visual perception}, PUBLISHER = {Lawrence Erlbaum Associates}, YEAR = {1979}, AUTHOR = {James J. Gibson}, ISBN = {898599598} }
@INCOLLECTION{Gibson1977, AUTHOR = {James J. Gibson}, TITLE = {The Theory of Affordances}, BOOKTITLE = {Perceiving, Acting, and Knowing}, PUBLISHER = {Lawrence Erlbaum, Hillsdale}, YEAR = {1977}, EDITOR = {R. E. Shaw and J. Bransford} }
@INPROCEEDINGS{Giles1998, AUTHOR = {C. Lee Giles and Kurt D. Bollacker and Steve Lawrence}, TITLE = {CiteSeer: An Automatic Citation Indexing System}, BOOKTITLE = {Digital Libraries 98 - Third ACM Conference on Digital Libraries}, YEAR = {1998}, PAGES = {89-98}, ABSTRACT = {We present CiteSeer: an autonomous citation indexing system which indexes academic literature in electronic format (e.g. Postscript files on the Web). CiteSeer understands how to parse citations, identify citations to the same paper in different formats, and identify the context of citations in the body of articles. CiteSeer provides most of the advantages of traditional (manually constructed) citation indexes (e.g. the ISI citation indexes), including: literature retrieval by following citation links (e.g. by providing a list of papers that cite a given paper), the evaluation and ranking of papers, authors, journals, etc. based on the number of citations, and the identification of research trends. CiteSeer has many advantages over traditional citation indexes, including the ability to create more up-to-date databases which are not limited to a preselected set of journals or restricted by journal publication delays, completely autonomous operation with a corresponding reduction in cost, and powerful interactive browsing of the literature using the context of citations. Given a particular paper of interest, CiteSeer can display the context of how the paper is cited in subsequent publications. This context may contain a brief summary of the paper, another author's response to the paper, or subsequent work which builds upon the original article. CiteSeer allows the location of papers by keyword search or by citation links. Papers related to a given paper can be located using common citation information or word vector similarity. CiteSeer will soon be available for public use.} }
@INPROCEEDINGS{Goldman2000, AUTHOR = {Roy Goldman and Jennifer Widom}, TITLE = {WSQ/DSQ: A Practical Approach for Combined Querying of Databases and the Web}, BOOKTITLE = {the ACM SIGMOD Int. Conf. on Management of Data}, YEAR = {2000}, PAGES = {285--296}, ADDRESS = {Dallas, US}, ABSTRACT = {We present WSQ/DSQ (pronounced "wisk-disk"), a new approach for combining the query facilities of traditional databases with existing search engines on the Web. WSQ, for Web-Supported (Database) Queries, leverages results from Web searches to enhance SQL queries over a relational database. DSQ, for Database-Supported (Web) Queries, uses information stored in the database to enhance and explain Web searches. This paper focuses primarily on WSQ, describing a simple, low-overhead way to...} }
@INPROCEEDINGS{Gonzales2001, AUTHOR = {L. Gonz{\'a}les}, TITLE = {Universal Aggregation Operators}, BOOKTITLE = {EusFlat'2001}, YEAR = {2001}, ADDRESS = {Leicester} }
@ARTICLE{Grunwald2003, AUTHOR = {Peter D. Grunwald and Joseph Y. Halpern}, TITLE = {Updating Probabilities}, JOURNAL = {Journal of Artificial Intelligence Research (JAIR)}, YEAR = {2003}, VOLUME = {19}, PAGES = {243-278}, ABSTRACT = {As examples such as the Monty Hall puzzle show, applying conditioning to update a probability distribution on a ``naive space'', which does not take into account the protocol used, can often lead to counterintuitive results. Here we examine why. A criterion known as CAR (``coarsening at random'') in the statistical literature characterizes when ``naive'' conditioning in a naive space works. We show that the CAR condition holds rather infrequently, and we provide a procedural characterization of it, by giving a randomized algorithm that generates all and only distributions for which CAR holds. This substantially extends previous characterizations of CAR. We also consider more generalized notions of update such as Jeffrey conditioning and minimizing relative entropy (MRE). We give a generalization of the CAR condition that characterizes when Jeffrey conditioning leads to appropriate answers, and show that there exist some very simple settings in which MRE essentially never gives the right results. This generalizes and interconnects previous results obtained in the literature on CAR and MRE.} }
@INPROCEEDINGS{Guha1998, AUTHOR = {Sudipto Guha and Rajeev Rastogi and Kyuseok Shim}, TITLE = {CURE: An Efficient Clustering Algorithm for Large Databases}, BOOKTITLE = {ACM SIGMOD International Conference on Management of Data}, YEAR = {1998}, PAGES = {73--84}, ABSTRACT = {Clustering, in data mining, is useful for discovering groups and identifying interesting distributions in the underlying data. Traditional clustering algorithms either favor clusters with spherical shapes and similar sizes, or are very fragile in the presence of outliers. We propose a new clustering algorithm called CURE that is more robust to outliers, and identifies clusters having non-spherical shapes and wide variances in size. CURE achieves this by representing each cluster by a certain...} }
@INPROCEEDINGS{Kayacik2003, AUTHOR = {Gunes Kayacik, Nur Zincir-Heywood, Malcolm Heywood}, TITLE = {On the Capability of an {S}{O}{M} based Intrusion Detection System}, BOOKTITLE = {The IEEE International Joint Conference on Neural Networks, IJCNN03}, YEAR = {2003} }
@BOOK{Gusfield1997, TITLE = {Algorithms on Strings, Trees, and Sequences: Computer Science and Computational Biology}, PUBLISHER = {Cambridge University Press}, YEAR = {1997}, AUTHOR = {Dan Gusfield}, EDITION = {$1^{st}$}, OWNER = {dkkang}, TIMESTAMP = {2006.05.26} }
@INPROCEEDINGS{Hammer1997, AUTHOR = {Joachim Hammer and Hector Garcia-Molina and Junghoo Cho and Arturo Crespo and Rohan Aranha}, TITLE = {Extracting Semistructured Information from the Web}, BOOKTITLE = {the Workshop on Management fo Semistructured Data}, YEAR = {1997} }
@INPROCEEDINGS{Hammer1997sigmod, AUTHOR = {Joachim Hammer and Hector Garcia-Molina and Svetlozar Nestorov and Ramana Yerneni and Marcus Breunig and Vasilis Vassalos}, TITLE = {Template-based wrappers in the TSIMMIS system}, BOOKTITLE = {Twenty-Third ACM SIGMOD International Conference on Management of Data}, YEAR = {1997}, ADDRESS = {Tucson, Arizona} }
@INCOLLECTION{han96exploration, AUTHOR = {Jiawei Han and Yongjian Fu}, TITLE = {Exploration of the Power of Attribute-Oriented Induction in Data Mining}, BOOKTITLE = {Advances in Knowledge Discovery and Data Mining}, PUBLISHER = {AIII Press/MIT Press}, YEAR = {1996}, EDITOR = {Usama M. Fayyad and Gregory Piatetsky-Shapiro and Padhr Smyth and Ramasamy Uthurusamy}, ISBN = {0-262-56097-6 (softcover)}, URL = {citeseer.ist.psu.edu/han96exploration.html} }
@ARTICLE{harnad90theSymbol, AUTHOR = {S. Harnad}, TITLE = {The Symbol Grounding Problem}, JOURNAL = {Physica D: Nonlinear Phenomena}, YEAR = {1990}, VOLUME = {42}, PAGES = {335--346}, URL = {http://www.isrl.uiuc.edu/~amag/langev/paper/harnad90theSymbol.html} }
@ARTICLE{Hart1968, AUTHOR = {P. E. Hart and N. J. Nilsson and B. Raphael}, TITLE = {A Formal Basis for the Heuristic Determination of Minimum Cost Paths}, JOURNAL = {IEEE Transactions on Systems Science and Cybernetics (SSC)}, YEAR = {1968}, VOLUME = {4}, PAGES = {100-107}, NUMBER = {2} }
@INPROCEEDINGS{Harvey2003, AUTHOR = {Nicholas J. A. Harvey and Michael B. Jones and Stefan Saroiu and Marvin Theimer and Alec Wolman}, TITLE = {Skipnet: A scalable overlay network with practical locality properties}, BOOKTITLE = {the Fourth USENIX Symposium on Internet Technologies and Systems (USITS '03)}, YEAR = {2003}, ADDRESS = {Seattle, WA}, ABSTRACT = {Scalable overlay networks such as Chord, CAN, Pastry, and Tapestry have recently emerged as flexible infrastructure for building large peer-to-peer systems. In practice, such systems have two disadvantages: They provide no control over where data is stored and no guarantee that routing paths remain within an administrative domain whenever possible. SkipNet is a scalable overlay network that provides controlled data placement and guaranteed routing locality by organizing data primarily by string names. SkipNet allows for both fine-grained and coarse-grained control over data placement: Content can be placed either on a pre-determined node or distributed uniformly across the nodes of a hierarchical naming subtree. An additional useful consequence of SkipNet's locality properties is that partition failures, in which an entire organization disconnects from the rest of the system, can result in two disjoint, but well-connected overlay networks.}, KEYWORDS = {Peer-to-Peer, Scalable, Locality, Self-Configuring, Range Query, Distributed System} }
@ARTICLE{haussler1988, AUTHOR = {D. Haussler}, TITLE = {Quantifying inductive bias: A{I} learning algorithms and {V}aliant's learning framework}, JOURNAL = {Artificial intelligence}, YEAR = {1988}, VOLUME = {36}, PAGES = {177--221} }
@ARTICLE{Haussler1992, AUTHOR = {David Haussler}, TITLE = {Decision Theoretic Generalizations of the PAC Model for Neural Net and Other Learning Applications}, JOURNAL = {Information and Computation}, YEAR = {1992}, VOLUME = {100}, PAGES = {78-150} }
@INPROCEEDINGS{Haussler1991, AUTHOR = {David Haussler and Michael Kearns and Robert Schapire}, TITLE = {Bounds on the Sample Complexity of Bayesian Learning Using Information Theory and the VC Dimension}, BOOKTITLE = {the fourth annual workshop on Computational learning theory}, YEAR = {1991}, PAGES = {61-74}, ADDRESS = {Santa Cruz, California, United States}, ABSTRACT = {In this paper we study a Bayesian or average-case model of concept learning with a twofold goal: to provide more precise characterizations of learning curve (sample complexity) behavior that depend on properties of both the prior distribution over concepts and the sequence of instances seen by the learner, and to smoothly unite in a common framework the popular statistical physics and VC dimension theories of learning curves. To achieve this, we undertake a systematic investigation and...} }
@INPROCEEDINGS{Haveliwala2002, AUTHOR = {Taher H. Haveliwala}, TITLE = {Topic-Sensitive PageRank}, BOOKTITLE = {the Eleventh International World Wide Web Conference}, YEAR = {2002} }
@TECHREPORT{Haveliwala1999, AUTHOR = {Taher H. Haveliwala}, TITLE = {Efficient Computation of Pagerank}, INSTITUTION = {Stanford University}, YEAR = {1999}, NUMBER = {1999-31}, MONTH = {1999}, ABSTRACT = {This paper discusses efficient techniques for computing PageRank, a ranking metric for hypertext documents. We show that PageRank can be computed for very large subgraphs of the web (up to hundreds of millions of nodes) on machines with limited main memory. Running-time measurements on various memory configurations are presented for PageRank computation over the 24-million-page Stanford WebBase archive. We discuss several methods for analyzing the convergence of PageRank based on the induced ordering of the pages. We present convergence results helpful for determining the number of iterations necessary to achieve a useful PageRank assignment, both in the absence and presence of search queries.} }
@BOOK{Hawkins2004, TITLE = {On Intelligence}, PUBLISHER = {Times Books}, YEAR = {2004}, AUTHOR = {Jeff Hawkins and Sandra Blakeslee}, ISBN = {805074562} }
@INPROCEEDINGS{Hearst1995, AUTHOR = {Marti A. Hearst}, TITLE = {TileBars: Visualization of Term Distribution Information in Full Text Information Access}, BOOKTITLE = {Proceedings of the Conference on Human Factors in Computing Systems, {CHI}'95}, YEAR = {1995}, ADDRESS = {Denver, CO}, URL = {citeseer.ist.psu.edu/hearst95tilebars.html} }
@ARTICLE{Hearst2002, AUTHOR = {Marti Hearst and Ame Elliott and Jennifer English and Rashmi Sinha and Kirsten Swearingen and Ka-Ping Yee}, TITLE = {Finding the flow in web site search}, JOURNAL = {Communications of the ACM}, YEAR = {2002}, VOLUME = {45}, PAGES = {42 - 49}, NUMBER = {9}, ABSTRACT = {Designing a search system and interface may best be served (and executed) by scrutinizing usability studies.} }
@INPROCEEDINGS{Heller2003, AUTHOR = {Katherine A Heller and Krysta M Svore and Angelos D. Keromytis and Salvatore J. Stolfo}, TITLE = {One Class Support Vector Machines for Detecting Anomalous Window Registry Accesses}, BOOKTITLE = {The 3rd IEEE Conference Data Mining Workshop on Data Mining for Computer Security}, YEAR = {2003}, ADDRESS = {Florida} }
@ARTICLE{Helmer2003, AUTHOR = {Guy Helmer and Johnny Wong and Vasant Honavar and Les Miller}, TITLE = {Lightweight Agents for Intrusion Detection}, JOURNAL = {Journal of Systems and Software}, YEAR = {2003}, VOLUME = {67}, PAGES = {109-122} }
@INPROCEEDINGS{Helmer1999, AUTHOR = {Guy Helmer and Johnny Wong and Vasant Honavar and Les Miller}, TITLE = {Data-Driven Induction of Compact Predictive Rules for Intrusion Detection from System Log Data}, BOOKTITLE = {the Conference on Genetic and Evolutionary Computation (GECCO 99)}, YEAR = {1999}, ADDRESS = {Orlando, Florida} }
@INPROCEEDINGS{Helmer2001, AUTHOR = {Guy Helmer and Johnny Wong and Mark Slagell and Vasant Honavar and Les Miller and Robyn Lutz}, TITLE = {A Software Fault Tree Approach to Requirement Analysis of an Intrusion Detection System}, BOOKTITLE = {Symposium on Requirements Engineering for Information Security}, YEAR = {2001} }
@INPROCEEDINGS{Helmer1998, AUTHOR = {Guy Helmer and Johnny S. K. Wong and Vasant Honavar and Les Miller}, TITLE = {Intelligent Agents for Intrusion Detection}, BOOKTITLE = {IEEE Information Technology Conference}, YEAR = {1998}, PAGES = {121-124}, ADDRESS = {Syracuse, NY}, ABSTRACT = {This paper focuses on intrusion detection and countermeasures with respect to widely-used operating systems and networks. The design and architecture of an intrusion detection system built from distributed agents is proposed to implement an intelligent system on which data mining can be performed to provide global, temporal views of an entire networked system. A starting point for agent intelligence in our system is the research into the use of machine learning over system call traces from the...} }
@ARTICLE{Helmer2002, AUTHOR = {Guy Helmer and Johnny S. K. Wong and Vasant G. Honavar and Les Miller}, TITLE = {Automated discovery of concise predictive rules for intrusion detection}, JOURNAL = {J. Syst. Softw.}, YEAR = {2002}, VOLUME = {60}, PAGES = {165--175}, NUMBER = {3}, DOI = {http://dx.doi.org/10.1016/S0164-1212(01)00088-7}, ISSN = {0164-1212}, PUBLISHER = {Elsevier Science Inc.} }
@TECHREPORT{Hendler1996, AUTHOR = {Hendler, J. and Stoffel, K. and Taylor, M.}, TITLE = {Advances in High Performance Knowledge Representation}, INSTITUTION = {University of Maryland Institute for Advanced Computer Studies Dept. of Computer Science}, YEAR = {1996}, NUMBER = {CS-TR-3672} }
@ARTICLE{Hipp2000, AUTHOR = {Jochen Hipp and Ulrich Guntzer and Gholamreza Nakhaeizadeh}, TITLE = {Algorithms for Association Rule Mining A General Survey and Comparison}, JOURNAL = {SIGKDD Explorations}, YEAR = {2000}, ABSTRACT = {Today there are several efficient algorithms that cope with the popular and computationally expensive task of association rule mining. Actually, these algorithms are more or less described on their own. In this paper we explain the fundamentals of association rule mining and moreover derive a general framework. Based on this we describe today 's approaches in context by pointing out common aspects and di erences. After that we thoroughly investigate their strengths and weaknesses and carry out...} }
@ARTICLE{Hofmann2001, AUTHOR = {Thomas Hofmann}, TITLE = {Unsupervised Learning by Probabilistic Latent Semantic Analysis}, JOURNAL = {Machine Learning}, YEAR = {2001}, VOLUME = {42}, PAGES = {177 - 196}, ABSTRACT = {This paper presents a novel statistical method for factor analysis of binary and count data which is closely related to a technique known as Latent Semantic Analysis. In contrast to the latter method which stems from linear algebra and performs a Singular Value Decomposition of co-occurrence tables, the proposed technique uses a generative latent class model to perform a probabilistic mixture decomposition. This results in a more principled approach with a solid foundation in statistical inference. More precisely, we propose to make use of a temperature controlled version of the Expectation Maximization algorithm for model fitting, which has shown excellent performance in practice. Probabilistic Latent Semantic Analysis has many applications, most prominently in information retrieval, natural language processing, machine learning from text, and in related areas. The paper presents perplexity results for different types of text and linguistic data collections and discusses an application in automated document indexing. The experiments indicate substantial and consistent improvements of the probabilistic method over standard Latent Semantic Analysis.} }
@INPROCEEDINGS{Hofmann1999, AUTHOR = {Thomas Hofmann}, TITLE = {The Cluster-Abstraction Model: Unsupervised Learning of Topic Hierarchies from Text Data}, BOOKTITLE = {IJCAI 99}, YEAR = {1999}, ABSTRACT = {This paper presents a novel statistical latent class model for text mining and interactive information access. The described learning architecture, called Cluster--Abstraction Model (CAM), is purely data driven and utilizes context-specific word occurrence statistics. In an intertwined fashion, the CAM extracts hierarchical relations between groups of documents as well as an abstractive organization of keywords. An annealed version of the Expectation--Maximization (EM) algorithm for maximum...} }
@INPROCEEDINGS{Hofmann1999sigir, AUTHOR = {Thomas Hofmann}, TITLE = {Probabilistic latent semantic indexing}, BOOKTITLE = {the 22nd annual international ACM SIGIR conference on Research and development in information retrieval}, YEAR = {1999}, PAGES = {50-57}, ADDRESS = {Berkeley, California, United States}, PUBLISHER = {ACM Press, New York, NY, USA} }
@INPROCEEDINGS{Hofmann1999uai, AUTHOR = {Thomas Hofmann}, TITLE = {Probabilistic Latent Semantic Analysis}, BOOKTITLE = {Uncertainty in Artificial Intelligence}, YEAR = {1999}, ADDRESS = {Stockholm}, ABSTRACT = {Probabilistic Latent Semantic Analysis is a novel statistical technique for the analysis of two--mode and co-occurrence data, which has applications in information retrieval and filtering, natural language processing, machine learning from text, and in related areas. Compared to standard Latent Semantic Analysis which stems from linear algebra and performs a Singular Value Decomposition of co-occurrence tables, the proposed method is based on a mixture decomposition derived from a latent class...} }
@ARTICLE{hofmeyr98intrusion, AUTHOR = {Steven A. Hofmeyr and Stephanie Forrest and Anil Somayaji}, TITLE = {Intrusion Detection Using Sequences of System Calls}, JOURNAL = {Journal of Computer Security}, YEAR = {1998}, VOLUME = {6}, PAGES = {151-180}, NUMBER = {3}, URL = {citeseer.ist.psu.edu/hofmeyr98intrusion.html} }
@INPROCEEDINGS{Hotho2003, AUTHOR = {Andreas Hotho and Steffen Staab and Gerd Stumme}, TITLE = {WordNet improves text document clustering}, BOOKTITLE = {Proc. of the SIGIR 2003 Semantic Web Workshop}, YEAR = {2003} }
@ARTICLE{Huang1994, AUTHOR = {Cecil Huang and Adnan Darwiche}, TITLE = {Inference in Belief Networks: A Procedural Guide}, JOURNAL = {International Journal of Approximate Reasoning}, YEAR = {1994}, VOLUME = {15}, PAGES = {225-263}, NUMBER = {3}, ABSTRACT = {Belief networks are popular tools for encoding uncertainty in expert systems. These networks rely on inference algorithms to compute beliefs in the context of observed evidence. One established method for exact inference on belief networks is the Probability Propagation in Trees of Clusters (PPTC) algorithm, as developed by Lauritzen and Spiegelhalter and refined by Jensen et al. PPTC converts the belief network into a secondary structure, then computes probabilities by manipulating the...} }
@ARTICLE{Huang2003, AUTHOR = {Xiaoqiu Huang and Kun-Mao Chao}, TITLE = {A generalized global alignment algorithm}, JOURNAL = {Bioinformatics}, YEAR = {2003}, VOLUME = {19}, PAGES = {228-233}, NUMBER = {2}, ABSTRACT = {Motivation: Homologous sequences are sometimes similar over some regions but different over other regions. Homologous sequences have a much lower global similarity if the different regions are much longer than the similar regions. Results: We present a generalized global alignment algorithm for comparing sequences with intermittent similarities, an ordered list of similar regions separated by different regions. A generalized global alignment model is defined to handle sequences with intermittent similarities. A dynamic programming algorithm is designed to compute an optimal general alignment in time proportional to the product of sequence lengths and in space proportional to the sum of sequence lengths. The algorithm is implemented as a computer program named GAP3 (Global Alignment Program Version 3). The generalized global alignment model is validated by experimental results produced with GAP3 on both DNA and protein sequences. The GAP3 program extends the ability of standard global alignment programs to recognize homologous sequences of lower similarity. The GAP3 program is freely available for academic use at http://bioinformatics.iastate.edu/aat/align/align.html.} }
@INPROCEEDINGS{Indyk1999, AUTHOR = {Piotr Indyk}, TITLE = {Sublinear Time Algorithms for Metric Space Problems}, BOOKTITLE = {STOC 99}, YEAR = {1999}, PAGES = {428--434} }
@INPROCEEDINGS{Jaakkola1999, AUTHOR = {Tommi Jaakkola and Marina Meila and Tony Jebara}, TITLE = {Maximum entropy discrimination}, BOOKTITLE = {NIPS 1999}, YEAR = {1999}, PAGES = {470-476}, ABSTRACT = {We present a general framewrk for discriminative estimation based on the maximum entropy principle and its extensions. All calculations involve...} }
@INPROCEEDINGS{Jeffreys1946, AUTHOR = {Jeffreys, H.}, TITLE = {An invariant form for the prior probability in estimation procedures}, BOOKTITLE = {Proceedings of the Royal Society, London, Ser. A, 186}, YEAR = {1946}, PAGES = {453-461}, ADDRESS = {London, UK} }
@INPROCEEDINGS{Jensen2002, AUTHOR = {David Jensen and Jennifer Neville}, TITLE = {Linkage and Autocorrelation Cause Feature Selection Bias in Relational Learning}, BOOKTITLE = {ICML '02: Proceedings of the Nineteenth International Conference on Machine Learning}, YEAR = {2002}, PAGES = {259--266}, ADDRESS = {San Francisco, CA, USA}, PUBLISHER = {Morgan Kaufmann Publishers Inc.}, ISBN = {1-55860-873-7} }
@ARTICLE{Jin2003, AUTHOR = {Lixia Jin and Weiwu Fang and Huanwen Tang}, TITLE = {Prediction of protein structural classes by a new measure of information discrepancy}, JOURNAL = {Computational Biology and Chemistry}, YEAR = {2003}, VOLUME = {27}, PAGES = {373-380}, NUMBER = {3} }
@INPROCEEDINGS{joachims98text, AUTHOR = {Thorsten Joachims}, TITLE = {Text categorization with support vector machines: learning with many relevant features}, BOOKTITLE = {Proceedings of {ECML}-98, 10th European Conference on Machine Learning}, YEAR = {1998}, EDITOR = {Claire N{\'e}dellec and C{\'e}line Rouveirol}, PAGES = {137--142}, ADDRESS = {Chemnitz, DE}, PUBLISHER = {Springer Verlag, Heidelberg, DE}, URL = {citeseer.ist.psu.edu/joachims97text.html} }
@INPROCEEDINGS{John95, AUTHOR = {George John and Pat Langley}, TITLE = {Estimating Continuous Distributions in Bayesian Classifiers}, BOOKTITLE = {Proceedings of the 11th Annual Conference on Uncertainty in Artificial Intelligence (UAI-95)}, YEAR = {1995}, PAGES = {338-345}, ADDRESS = {San Francisco, CA}, PUBLISHER = {Morgan Kaufmann Publishers} }
@INPROCEEDINGS{Jones2001, AUTHOR = {A. Jones and S. Li}, TITLE = {Temporal Signatures for Intrusion Detection}, BOOKTITLE = {ACSAC '01: Proceedings of the 17th Annual Computer Security Applications Conference}, YEAR = {2001}, PAGES = {252}, ADDRESS = {Washington, DC, USA}, PUBLISHER = {IEEE Computer Society}, ISBN = {0-7695-1405-7} }
@INPROCEEDINGS{Kamvar2003, AUTHOR = {Sepandar Kamvar and Mario Schlosser and Hector Garcia-Molina}, TITLE = {EigenRep: Reputation Management in P2P Networks}, BOOKTITLE = {the 12th International World Wide Web Conference}, YEAR = {2003}, ADDRESS = {Budapest, Hungary} }
@INPROCEEDINGS{Kandola2002, AUTHOR = {Jaz Kandola and John Shawe-Taylor and Nello Cristianini}, TITLE = {Learning semantic similarity}, BOOKTITLE = {NIPS 2002}, YEAR = {2002}, VOLUME = {15} }
@INPROCEEDINGS{Kang2003ismis, AUTHOR = {Dae-Ki Kang and Joongmin Choi}, TITLE = {{MetaNews}: An Information Agent for Gathering News Articles on the Web}, BOOKTITLE = {Foundations of Intelligent Systems, 14th International Symposium, {ISMIS} 2003, Maebashi City, Japan, October 28-31, 2003, Proceedings}, YEAR = {2003}, EDITOR = {Ning Zhong and Zbigniew W. Ras and Shusaku Tsumoto and Einoshin Suzuki}, VOLUME = {2871}, SERIES = {Lecture Notes in Computer Science}, PAGES = {179-186}, PUBLISHER = {Springer} }
@INPROCEEDINGS{KangICCS1997, AUTHOR = {Dae-Ki Kang and Yun-Koo Chung and Woong-Rok Doh}, TITLE = {One-to-many template matching for automated visual inspection}, BOOKTITLE = {Poster session of the First International Conference on Cognitive Science}, YEAR = {1997}, ADDRESS = {Seoul, Korea}, MONTH = {August}, OWNER = {dkkang}, TIMESTAMP = {2006.07.10} }
@ARTICLE{KangIJMTM1999, AUTHOR = {Dae-Ki Kang and Yun-Koo Chung and Woong-Rok Doh and Won Jung and Sang-Bong Park}, TITLE = {Applying object modelling technique to automated visual inspection of automotive compressor parts omission}, JOURNAL = {International Journal of Machine Tools and Manufacture}, YEAR = {1999}, VOLUME = {39}, PAGES = {1779--1792}, NUMBER = {11}, MONTH = {August}, OWNER = {dkkang}, TIMESTAMP = {2006.07.10} }
@INPROCEEDINGS{KangICSC1997, AUTHOR = {Dae-Ki Kang and Yun-Koo Chung and Won Jung and Woong-Rok Doh and Sang-Bong Park}, TITLE = {Automated visual inspection of automotive evaporator core using one-to-many template matching}, BOOKTITLE = {Proceedings of the Second International ICSC Symposium on Intelligent Industrial Automation}, YEAR = {1997}, ADDRESS = {Nimes, France}, MONTH = {September}, OWNER = {dkkang}, TIMESTAMP = {2006.07.10} }
@INPROCEEDINGS{dkkang2005isi, AUTHOR = {Dae-Ki Kang and Doug Fuller and Vasant Honavar}, TITLE = {Learning Classifiers for Misuse Detection Using a Bag of System Calls Representation}, BOOKTITLE = {Proceedings of {IEEE} International Conference on Intelligence and Security Informatics {(ISI}-2005)}, YEAR = {2005}, VOLUME = {3495}, PAGES = {511-516}, ADDRESS = {Atlanta, GA, USA}, MONTH = {May}, PUBLISHER = {Springer-Verlag}, JOURNAL = {Lecture Notes in Computer Science} }
@INPROCEEDINGS{Kang2005iaw, AUTHOR = {Dae-Ki Kang and Doug Fuller and Vasant Honavar}, TITLE = {Learning Classifiers for Misuse and Anomaly Detection Using a Bag of System Calls Representation}, BOOKTITLE = {Proceedings of 6th IEEE Systems Man and Cybernetics Information Assurance Workshop (IAW)}, YEAR = {2005}, ADDRESS = {West Point, NY, USA} }
@INPROCEEDINGS{KangWebnet1997, AUTHOR = {Dae-Ki Kang and Joong-Bae Kim and Ho-Sang Ham}, TITLE = {HANMAUM - a multi-agent model for customer, merchant, and directory service}, BOOKTITLE = {Proceedings of the Second World Conference of the WWW, Internet, Intranet}, YEAR = {1997}, ADDRESS = {Toronto, Canada}, MONTH = {October}, OWNER = {dkkang}, TIMESTAMP = {2006.07.10} }
@INPROCEEDINGS{KangINET1997, AUTHOR = {Dae-Ki Kang and Joong-Bae Kim and Joo-Chan Sohn and Ho-Sang Ham}, TITLE = {A world wide web directory service architecture for electronic commerce}, BOOKTITLE = {Proceedings of the Seventh Annual Conference of Internet Society}, YEAR = {1997}, ADDRESS = {Kuala Lumpur, Malaysia}, MONTH = {June}, OWNER = {dkkang}, TIMESTAMP = {2006.07.10} }
@INPROCEEDINGS{dkkang2006Recursive, AUTHOR = {Dae-Ki Kang and Adrian Silvescu and Vasant Honavar}, TITLE = {{RNBL-MN}: A Recursive Naive Bayes Learner for Sequence Classification}, BOOKTITLE = {10th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD 2006)}, YEAR = {2006}, VOLUME = {3918}, SERIES = {Lecture Notes in Artificial Intelligence}, ADDRESS = {Singapore}, MONTH = {April}, PUBLISHER = {Springer Verlag} }
@INPROCEEDINGS{dkkang2004kdo, AUTHOR = {Dae-Ki Kang and Adrian Silvescu and Jun Zhang and Vasant Honavar}, TITLE = {Generation of Attribute Value Taxonomies from Data and Their Use in Data-Driven Construction of Accurate and Compact Naive Bayes Classifiers}, BOOKTITLE = {Proceedings of {ECML/PKDD}-2004 Knowledge Discovery and Ontologies Workshop {(KDO}-2004)}, YEAR = {2004}, ADDRESS = {Pisa, Italy}, MONTH = {September} }
@INPROCEEDINGS{Kang2004icdm, AUTHOR = {Dae-Ki Kang and Adrian Silvescu and Jun Zhang and Vasant Honavar}, TITLE = {Generation of Attribute Value Taxonomies from Data for Data-Driven Construction of Accurate and Compact Classifiers.}, BOOKTITLE = {Proceedings of the 4th IEEE International Conference on Data Mining (ICDM 2004), 1-4 November 2004, Brighton, UK}, YEAR = {2004}, PAGES = {130--137}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, EE = { http://csdl.computer.org/comp/proceedings/icdm/2004/2142/00/21420130abs.htm} }
@INPROCEEDINGS{Kang2005sara, AUTHOR = {Dae-Ki Kang and Jun Zhang and Adrian Silvescu and Vasant Honavar}, TITLE = {Multinomial Event Model Based Abstraction for Sequence and Text Classification}, BOOKTITLE = {Abstraction, Reformulation and Approximation, 6th International Symposium, SARA 2005, Edinburgh, Scotland, UK, July 26-29, 2005, Proceedings}, YEAR = {2005}, SERIES = {Lecture Notes in Computer Science}, PAGES = {134--148}, PUBLISHER = {Springer} }
@INPROCEEDINGS{Karger1997, AUTHOR = {David Karger and Eric Lehman and Tom Leighton and Mathhew Levine and Daniel Lewin and Rina Panigrahy}, TITLE = {Consistent Hashing and Random Trees: Distributed Caching Protocols for Relieving Hot Spots on the World Wide Web}, BOOKTITLE = {ACM Symposium on Theory of Computing}, YEAR = {1997}, PAGES = {654--663}, ABSTRACT = {We describe a family of caching protocols for distributed networks that can be used to decrease or eliminate the occurrence of hot spots in the network. Our protocols are particularly designed for use with very large networks such as the Internet, where delays caused by hot spots can be severe, and where it is not feasible for every server to have complete information about the current state of the entire network. The protocols are easy to implement using existing network protocols such as...} }
@INPROCEEDINGS{Karger1999, AUTHOR = {David Karger and Alex Sherman and Andy Berkheimer and Bill Bogstad and Rizwan Dhanidina and Ken Iwamoto and Brian Kim and Luke Matkins and Yoav Yerushalmi}, TITLE = {Web Caching with Consistent Hashing}, BOOKTITLE = {the eighth international conference on World Wide Web}, YEAR = {1999}, PAGES = {1203 - 1213}, ADDRESS = {Toronto, Canada}, ABSTRACT = {A key performance measure for the World Wide Web is the speed with which content is served to users. As traffic on the Web increases, users are faced with increasing delays and failures in data delivery. Web caching is one of the key strategies that has been explored to improve performance. An important issue in many caching systems is how to decide what is cached where at any given time. Solutions have included multicast queries and directory schemes. In this paper, we offer a new web caching strategy based on consistent hashing. Consistent hashing provides an alternative to multicast and directory schemes, and has several other advantages in load balancing and fault tolerance. Its performance was analyzed theoretically in previous work; in this paper we describe the implementation of a consistent-hashing based system and experiments that support our thesis that it can provide performance improvements.} }
@INPROCEEDINGS{Kearns1993, AUTHOR = {Michael Kearns}, TITLE = {Efficient Noise-Tolerant Learning From Statistical Queries}, BOOKTITLE = {the Twenty-Fifth Annual ACM Symposium on Theory of Computing}, YEAR = {1993}, PAGES = {392-401} }
@ARTICLE{Kearns1997, AUTHOR = {Michael Kearns and Yishay Mansour and Andrew Y. Ng and Dana Ron}, TITLE = {An Experimental and Theoretical Comparison of Model Selection Methods}, JOURNAL = {Machine Learning}, YEAR = {1997}, VOLUME = {27}, PAGES = {7-50}, ABSTRACT = {We investigate the problem of model selection in the setting of supervised learning of boolean functions from independent random examples. More precisely, we compare methods for finding a balance between the complexity of the hypothesis chosen and its observed error on a random training sample of limited size, when the goal is that of minimizing the resulting generalization error. We undertake a detailed comparison of three wellknown model selection methods . a variation of Vapnik’s Guaranteed Risk Minimization (GRM), an instance of Rissanen’s Minimum Description Length Principle (MDL), and (hold-out) cross validation (CV). We introduce a general class of model selection methods (called penalty-based methods) that includes both GRM and MDL, and provide general methods for analyzing such rules. We provide both controlled experimental evidence and formal theorems to support the following conclusions:} }
@INCOLLECTION{Kercel2005, AUTHOR = {S. W. Kercel and P. Bach-Y-Rita}, TITLE = {Non-Invasive Coupling of Electronically Generated Data Into the Human Nervous System}, BOOKTITLE = {Wiley Encyclopedia of Biomedical Engineering}, PUBLISHER = {Wiley}, YEAR = {2005}, EDITOR = {Metin Akay}, NOTE = {In Press} }
@ARTICLE{King1995, AUTHOR = {R. D. King and A. Srinivasan and M. J .E. Sternberg}, TITLE = {Relating chemical activity to structure: an examination of ILP successes}, JOURNAL = {New Gen. Comput.}, YEAR = {1995}, VOLUME = {13}, PAGES = {411--433}, NUMBER = {3,4} }
@ARTICLE{Kleinberg1999, AUTHOR = {Jon M. Kleinberg}, TITLE = {Authoritative sources in a hyperlinked environment}, JOURNAL = {Journal of the ACM}, YEAR = {1999}, VOLUME = {46}, PAGES = {604--632}, NUMBER = {5} }
@INPROCEEDINGS{ecmlKlimtY04, AUTHOR = {Bryan Klimt and Yiming Yang}, TITLE = {The {E}nron Corpus: A New Dataset for Email Classification Research.}, BOOKTITLE = {15th European Conference on Machine Learning (ECML2004). Vol. 3201 of Lecture Notes in Computer Science : Springer-Verlag}, YEAR = {2004}, PAGES = {217-226}, MONTH = {September}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, EE = { http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=3201{\&}spage=217} }
@INPROCEEDINGS{DBLP:conf/pkdd/KnobbeSM02, AUTHOR = {Arno J. Knobbe and Arno Siebes and Bart Marseille}, TITLE = {Involving Aggregate Functions in Multi-relational Search.}, BOOKTITLE = {Principles of Data Mining and Knowledge Discovery, 6th European Conference, PKDD 2002, Helsinki, Finland, August 19-23, 2002, Proceedings}, YEAR = {2002}, EDITOR = {Tapio Elomaa and Heikki Mannila and Hannu Toivonen}, VOLUME = {2431}, SERIES = {Lecture Notes in Computer Science}, PAGES = {287-298}, PUBLISHER = {Springer}, EE = {http://link.springer.de/link/service/series/0558/bibs/2431/24310287.htm}, ISBN = {3-540-44037-2} }
@ARTICLE{Knoblock2000, AUTHOR = {Craig A. Knoblock and Kristina Lerman and Steven Minton and Ion Muslea}, TITLE = {Accurately and Reliably Extracting Data from the Web: A Machine Learning Approach}, JOURNAL = {IEEE Data Engineering Bulletin}, YEAR = {2000}, VOLUME = {23}, PAGES = {33-41}, NUMBER = {4}, ABSTRACT = {A critical problem in developing information agents for the Web is accessing data that is formatted for human use. We have developed a set of tools for extracting data from web sites and transforming it into a structured data format, such as XML. The resulting data can then be used to build new applications without having to deal with unstructured data. The advantages of our wrapping technology over previous work are the the ability to learn highly accurate extraction rules, to verify the...} }
@INPROCEEDINGS{Knoblock1998, AUTHOR = {Craig A. Knoblock and Steven Minton and Jose Luis Ambite and Naveen Ashish and Pragnesh Jay Modi and Ion Muslea and Andrew G. Philpot and Sheila Tejada}, TITLE = {Modeling Web Sources for Information Integration}, BOOKTITLE = {Fifteenth National Conference on Artificial Intelligence}, YEAR = {1998}, ABSTRACT = {The Web is based on a browsing paradigm that makes it difficult to retrieve and integrate data from multiple sites. Today, the only way to do this is to build specialized applications, which are time-consuming to develop and difficult to maintain. We are addressing this problem by creating the technology and tools for rapidly constructing information agents that extract, query, and integrate data from web sources. Our approach is based on a simple, uniform representation that makes it efficient ...} }
@INPROCEEDINGS{Knorr1998, AUTHOR = {Edwin M. Knorr and Raymond T. Ng}, TITLE = {Algorithms for Mining Distance-Based Outliers in Large Datasets}, BOOKTITLE = {24th Int. Conf. Very Large Data Bases, VLDB}, YEAR = {1998}, PAGES = {392--403}, ABSTRACT = {This paper deals with finding outliers (exceptions) in large, multidimensional datasets. The identification of outliers can lead to the discovery of truly unexpected knowledge in areas such as electronic commerce, credit card fraud, and even the analysis of performance statistics of professional athletes. Existing methods that we have seen for finding outliers in large datasets can only deal efficiently with two dimensions/attributes of a dataset. Here, we study the notion of DB- (Distance-...} }
@INPROCEEDINGS{Koenig2004, AUTHOR = {Nathan Koenig and Andrew Howard}, TITLE = {Design and Use Paradigms for Gazebo, An Open-Source Multi-Robot Simulator}, BOOKTITLE = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, YEAR = {2004}, PAGES = {2149-2154}, ADDRESS = {Sendai, Japan}, MONTH = {Sep.}, OWNER = {DK}, TIMESTAMP = {2006.03.06} }
@INPROCEEDINGS{kohavi96scaling, AUTHOR = {Ron Kohavi}, TITLE = {Scaling Up the Accuracy of {N}aive-{B}ayes Classifiers: a Decision-Tree Hybrid}, BOOKTITLE = {Proceedings of the Second International Conference on Knowledge Discovery and Data Mining}, YEAR = {1996}, PAGES = {202--207} }
@ARTICLE{Kohavi2001, AUTHOR = {Ron Kohavi and Foster Provost}, TITLE = {Applications of Data Mining to Electronic Commerce}, JOURNAL = {Data Mining and Knowledge Discovery}, YEAR = {2001}, VOLUME = {5}, PAGES = {5--10}, NUMBER = {1-2}, ISSN = {1384-5810}, PUBLISHER = {Kluwer Academic Publishers} }
@INPROCEEDINGS{Koller2001, AUTHOR = {Daphne Koller and Brian Milch}, TITLE = {Multi-Agent Influence Diagrams for Representing and Solving Games}, BOOKTITLE = {17th International Joint Conference on Artificial Intelligence (IJCAI)}, YEAR = {2001}, PAGES = {1027-1034}, ABSTRACT = {The traditional representations of games using the extensive form or the strategic (normal) form obscure much of the structure that is present in real-world games. In this paper, we propose a new representation language for general multi-player games -- multi-agent influence diagrams (MAIDs). This representation extends graphical models for probability distributions to a multi-agent decision-making context. MAIDs explicitly encode structure involving the dependence relationships among variables. As a consequence, we can define a notion of strategic relevance of one decision variable to another: D' is strategically relevant to D if, to optimize the decision rule at D, the decision maker needs to take into consideration the decision rule at D'. We provide a sound and complete graphical criterion for determining strategic relevance. We then show how strategic relevance can be used to detect structure in games, allowing a large game to be broken up into a set of interacting smaller games, which can be solved in sequence. We show that this decomposition can lead to substantial savings in the computational cost of finding Nash equilibria in these games.} }
@INPROCEEDINGS{Koller1997, AUTHOR = {Daphne Koller and Avi Pfeffer}, TITLE = {Object-oriented Bayesian networks}, BOOKTITLE = {the 13th Annual Conference on Uncertainty in AI (UAI)}, YEAR = {1997}, PAGES = {302--313}, ADDRESS = {Providence, Rhode Island}, ABSTRACT = {Bayesian networks provide a modeling language and associated inference algorithm for stochastic domains. They have been successfully applied in a variety of medium-scale applications. However, when faced with a large complex domain, the task of modeling using Bayesian networks begins to resemble the task of programming using logical circuits. In this paper, we describe an object-oriented Bayesian network (OOBN) language, which allows complex domains to be described in terms of inter-related objects. We use a Bayesian network fragment to describe the probabilistic relations between the attributes of an object. These attributes can themselves be objects, providing a natural framework for encoding part-of hierarchies. Classes are used to provide a reusable probabilistic model which can be applied to multiple similar objects. Classes also support inheritance of model fragments from a class to a subclass, allowing the common aspects of related classes to be defined only once. Our language has clear declarative semantics: an OOBN can be interpreted as a stochastic functional program, so that it uniquely specifies a probabilistic model. We provide an inference algorithm for OOBNs, and show that much of the structural information encoded by an OOBN---particularly the encapsulation of variables within an object and the reuse of model fragments in different contexts---can also be used to speed up the inference process.} }
@INPROCEEDINGS{Krishnapuram2003, AUTHOR = {Raghu Krishnapuram and Krishna Prasad Chitrapura and Sachindra Joshi}, TITLE = {Classification of Text Documents Based on Minimum System Entropy}, BOOKTITLE = {ICML 2003}, YEAR = {2003}, PAGES = {384-391}, ABSTRACT = {In this paper, we describe a new approach to classification of text documents based on the minimization of system entropy, i.e., the overall uncertainty associated with the joint distribution of words and labels in the collection. The classification algorithm assigns a class label to a new document in such a way that its insertion into the system results in the maximum decrease (or least increase) in system entropy. We provide insights into the minimum system entropy criterion, and establish connections to traditional naive Bayes approaches. Experimental results indicate that the algorithm performs well in terms of classification accuracy. It is less sensitive to feature selection and more scalable when compared with SVM.} }
@INPROCEEDINGS{KruegelKMRV05, AUTHOR = {Christopher Kr{\"u}gel and E. Kirda and D. Mutz and W. Robertson and G. Vigna}, TITLE = {Automating Mimicry Attacks Using Static Binary Analysis}, BOOKTITLE = {Proceedings of Security~'05, the 14th USENIX Security Symposium}, YEAR = {2005}, PAGES = {161--176}, ADDRESS = {Baltimore, MD, USA}, ABSTRACT = {Intrusion detection systems that monitor sequences of system calls have recently become more sophisticated in defining legitimate application behavior. In particular, additional information, such as the value of the program counter and the configuration of the program's call stack at each system call, has been used to achieve better characterization of program behavior. While there is common agreement that this additional information complicates the task for the attacker, it is less clear to which extent an intruder is constrained. In this paper, we present a novel technique to evade the extended detection features of state-of-the-art intrusion detection systems and reduce the task of the intruder to a traditional mimicry attack. Given a legitimate sequence of system calls, our technique allows the attacker to execute each system call in the correct execution context by obtaining and relinquishing the control of the application's execution flow through manipulation of code pointers. We have developed a static analysis tool for Intel x86 binaries that uses symbolic execution to automatically identify instructions that can be used to redirect control flow and to compute the necessary modifications to the environment of the process. We used our tool to successfully exploit three vulnerable programs and evade detection by existing state-of-the-art system call monitors. In addition, we analyzed three real-world applications to verify the general applicability of our techniques.} }
@INPROCEEDINGS{kruegel03:syscalls, AUTHOR = {Christopher Kr{\"u}gel and D. Mutz and F. Valeur and G. Vigna}, TITLE = {{On the Detection of Anomalous System Call Arguments}}, BOOKTITLE = {Proceedings of the 2003 European Symposium on Research in Computer Security}, YEAR = {2003}, ADDRESS = {Gj\o vik, Norway}, MONTH = {October} }
@INPROCEEDINGS{DBLP:conf/acsac/KruegelMRV03, AUTHOR = {Christopher Kr{\"u}gel and Darren Mutz and William Robertson and Fredrik Valeur}, TITLE = {Bayesian Event Classification for Intrusion Detection.}, BOOKTITLE = {19th Annual Computer Security Applications Conference (ACSAC 2003), 8-12 December 2003, Las Vegas, NV, USA}, YEAR = {2003}, PAGES = {14-23}, PUBLISHER = {IEEE Computer Society}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, EE = { http://csdl.computer.org/comp/proceedings/acsac/2003/2041/00/20410014abs.htm}, ISBN = {0-7695-2041-3} }
@ARTICLE{Kullback1951, AUTHOR = {Kullback, S. and Leibler, R. A.}, TITLE = {On information and sufficiency}, JOURNAL = {Ann. Math. Statist.}, YEAR = {1951}, VOLUME = {22}, PAGES = {79--86} }
@ARTICLE{Kushilevitz1993, AUTHOR = {Eyal Kushilevitz and Yishay Mansour}, TITLE = {Learning Decision Trees using the Fourier Spectrum}, JOURNAL = {SIAM Journal on Computing}, YEAR = {1993}, VOLUME = {22}, PAGES = {1331-1348}, NUMBER = {6} }
@INPROCEEDINGS{Kushmerick1997, AUTHOR = {Nickolas Kushmerick and Daniel S. Weld and Robert B. Doorenbos}, TITLE = {Wrapper induction for information extraction}, BOOKTITLE = {Intl. Joint Conference on Artificial Intelligence (IJCAI)}, YEAR = {1997}, PAGES = {729--737} }
@ARTICLE{Lam1994, AUTHOR = {Wai Lam and Fahiem Bacchus}, TITLE = {Learning Bayesian Belief Networks An approach based on the MDL Principle}, JOURNAL = {Computational Intelligence}, YEAR = {1994}, VOLUME = {10}, PAGES = {269-293} }
@INPROCEEDINGS{Landwehr2003, AUTHOR = {Niels Landwehr and Mark Hall and Eibe Frank}, TITLE = {Logistic Model Trees}, BOOKTITLE = {ECML 2003}, YEAR = {2003}, PAGES = {241-252}, ADDRESS = {Dubrovnik, Croatia} }
@INPROCEEDINGS{lang95newsweeder, AUTHOR = {Ken Lang}, TITLE = {News{W}eeder: learning to filter netnews}, BOOKTITLE = {Proceedings of the 12th International Conference on Machine Learning}, YEAR = {1995}, PAGES = {331--339}, PUBLISHER = {Morgan Kaufmann publishers Inc.: San Mateo, CA, USA}, URL = {citeseer.ist.psu.edu/lang95newsweeder.html} }
@INPROCEEDINGS{Langley1993, AUTHOR = {Pat Langley}, TITLE = {Induction of Recursive Bayesian Classifiers}, BOOKTITLE = {ECML '93: Proceedings of the European Conference on Machine Learning}, YEAR = {1993}, PAGES = {153--164}, ADDRESS = {London, UK}, PUBLISHER = {Springer-Verlag}, ISBN = {3-540-56602-3} }
@INPROCEEDINGS{langley92analysis, AUTHOR = {Pat Langley and Wayne Iba and Kevin Thompson}, TITLE = {An Analysis of Bayesian Classifiers}, BOOKTITLE = {National Conference on Artificial Intelligence}, YEAR = {1992}, PAGES = {223-228}, URL = {citeseer.ist.psu.edu/langley92analysis.html} }
@INPROCEEDINGS{langley94, AUTHOR = {Pat Langley and Stephanie Sage}, TITLE = {Induction of Selective Bayesian Classifiers}, BOOKTITLE = {Proceedings of the 10th Annual Conference on Uncertainty in Artificial Intelligence (UAI-94)}, YEAR = {1994}, PAGES = {399-406}, ADDRESS = {San Francisco, CA}, PUBLISHER = {Morgan Kaufmann Publishers} }
@INPROCEEDINGS{Lauser2003, AUTHOR = {Boris Lauser and Andreas Hotho}, TITLE = {Automatic multi-label subject indexing in a multilingual environment}, BOOKTITLE = {Proc. of the 7th European Conference in Research and Advanced Technology for Digital Libraries, ECDL 2003}, YEAR = {2003}, VOLUME = {2769}, PAGES = {140-151}, PUBLISHER = {Springer} }
@INPROCEEDINGS{lee98data, AUTHOR = {Wenke Lee and Salvatore Stolfo}, TITLE = {Data mining approaches for intrusion detection}, BOOKTITLE = {Proceedings of the 7th {USENIX} Security Symposium}, YEAR = {1998}, ADDRESS = {San Antonio, TX}, URL = {citeseer.ist.psu.edu/article/lee98data.html} }
@INPROCEEDINGS{Lee1999, AUTHOR = {Wenke Lee and Salvatore J. Stolfo and Kui W. Mok}, TITLE = {A Data Mining Framework for Building Intrusion Detection Models}, BOOKTITLE = {{IEEE} Symposium on Security and Privacy}, YEAR = {1999}, PAGES = {120-132}, URL = {citeseer.ist.psu.edu/article/lee99data.html} }
@INPROCEEDINGS{2002-leiva, AUTHOR = {H. Leiva and V. Honavar}, TITLE = {Experiments With {MRDTL} -- A Multi-Relational Decision Tree Learning Algorithm}, BOOKTITLE = {Workshop on Multi-Relational Data Mining in conjunction with The Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (MRDM02)}, YEAR = {2002}, EDITOR = {Sa\v{s}o D\v{z}eroski and Luc De Raedt and Stefan Wrobel}, PAGES = {97--112}, MONTH = {July}, PUBLISHER = {University of Alberta, Edmonton, Canada}, KEYWORDS = {Data_Mining}, PUBTYPE = {4}, URL = {http://www-ai.ijs.si/SasoDzeroski/MRDM2002/} }
@PHDTHESIS{Leiva2002, AUTHOR = {Hector Ariel Leiva}, TITLE = {{MRDTL}: A multi-relational decision tree learning algorithm}, SCHOOL = {Iowa State University}, YEAR = {2002}, TYPE = {Masters Thesis}, ABSTRACT = {In this paper, we have described an implementation of multi-relational decision tree learning (MRDTL) algorithm based on the techniques proposed by Knobbe et el. (Knobbe et el., 1999a, Knobbe et el., 1999b)} }
@ARTICLE{Leonard1992, AUTHOR = {John J. Leonard and Hugh F. Durrant-Whyte and Ingemar J. Cox}, TITLE = {Dynamic map building for an autonomous mobile robot}, JOURNAL = {Int. J. Rob. Res.}, YEAR = {1992}, VOLUME = {11}, PAGES = {286--298}, NUMBER = {4}, ADDRESS = {Thousand Oaks, CA, USA}, ISSN = {0278-3649}, PUBLISHER = {Sage Publications, Inc.} }
@INPROCEEDINGS{Lerman2001, AUTHOR = {Kristina Lerman and Craig Knoblock and Steven Minton}, TITLE = {Automatic Data Extraction from Lists and Tables in Web Sources}, BOOKTITLE = {Automatic Text Extraction and Mining workshop (ATEM-01) of IJCAI-01}, YEAR = {2001}, PAGES = {268-281}, ADDRESS = {Seattle, WA}, OWNER = {dkkang}, TIMESTAMP = {2006.06.08} }
@INPROCEEDINGS{Leslie2002a, AUTHOR = {Christina Leslie and Eleazar Eskin and William Stafford Noble}, TITLE = {The Spectrum Kernel: A String Kernel for SVM Protein Classification}, BOOKTITLE = {Proceedings of the Pacific Symposium on Biocomputing 2002 (PSB 2002)}, YEAR = {2002}, PAGES = {564--575} }
@INPROCEEDINGS{Leslie2002b, AUTHOR = {Christina Leslie and Eleazar Eskin and Jason Weston and William Stafford Noble}, TITLE = {Mismatch String Kernels for SVM Protein Classification}, BOOKTITLE = {Neural Information Processing Systems 2002 (NIPS 2002)}, YEAR = {2002} }
@ARTICLE{Lewis2004, AUTHOR = {David D. Lewis and Yiming Yang and Tony G. Rose and Fan Li}, TITLE = {{RCV1}: A New Benchmark Collection for Text Categorization Research}, JOURNAL = {J. Mach. Learn. Res.}, YEAR = {2004}, VOLUME = {5}, PAGES = {361--397}, ISSN = {1533-7928}, PUBLISHER = {MIT Press} }
@PHDTHESIS{Lewis1992, AUTHOR = {David Dolan Lewis}, TITLE = {Representation and learning in information retrieval}, SCHOOL = {University of Massachusetts}, YEAR = {1992}, ADDRESS = {Amherst, MA, USA}, ORDER_NO = {UMI Order No. GAX92-19460}, PUBLISHER = {University of Massachusetts} }
@BOOK{ming93introduction, TITLE = {An Introduction to Kolmogorov Complexity and Its Applications}, PUBLISHER = {Springer-Verlag}, YEAR = {1993}, AUTHOR = {Li, Ming and Vitanyi, Paul M. B.}, ADDRESS = {Berlin}, URL = {citeseer.ist.psu.edu/li97introduction.html} }
@INPROCEEDINGS{Liao2002, AUTHOR = {Yihua Liao and V. Rao Vemuri}, TITLE = {Using Text Categorization Techniques for Intrusion Detection}, BOOKTITLE = {Proceedings of the 11th USENIX Security Symposium}, YEAR = {2002}, PAGES = {51--59}, ADDRESS = {Berkeley, CA, USA}, PUBLISHER = {USENIX Association}, ISBN = {1-931971-00-5} }
@INPROCEEDINGS{LippmannCFGKWZ99, AUTHOR = {Richard Lippmann and Robert K. Cunningham and David J. Fried and Isaac Graf and Kris R. Kendall and Seth E. Webster and Marc A. Zissman}, TITLE = {Results of the DARPA 1998 Offline Intrusion Detection Evaluation}, BOOKTITLE = {Recent Advances in Intrusion Detection}, YEAR = {1999}, OWNER = {DK}, TIMESTAMP = {2006.05.16} }
@INPROCEEDINGS{liu2005, AUTHOR = {Alexander Liu and Cheryl Martin and Tom Hetherington and Sara Matzner}, TITLE = {A Comparison of System Call Feature Representations for Insider Threat Detection}, BOOKTITLE = {Proceedings of 6th IEEE Systems Man and Cybernetics Information Assurance Workshop (IAW)}, YEAR = {2005}, ADDRESS = {West Point, NY, USA} }
@ARTICLE{Lodhi2002, AUTHOR = {Huma Lodhi and Craig Saunders and John Shawe-Taylor and Nello Cristianini and Chris Watkins}, TITLE = {Text classification using string kernels}, JOURNAL = {The Journal of Machine Learning Research}, YEAR = {2002}, VOLUME = {2}, PAGES = {419 - 444}, ABSTRACT = {We propose a novel approach for categorizing text documents based on the use of a special kernel. The kernel is an inner product in the feature space generated by all subsequences of length k. A subsequence is any ordered sequence of k characters occurring in the text though not necessarily contiguously. The subsequences are weighted by an exponentially decaying factor of their full length in the text, hence emphasising those occurrences that are close to contiguous. A direct computation of this feature vector would involve a prohibitive amount of computation even for modest values of k, since the dimension of the feature space grows exponentially with k. The paper describes how despite this fact the inner product can be efficiently evaluated by a dynamic programming technique. Experimental comparisons of the performance of the kernel compared with a standard word feature space kernel (Joachims, 1998) show positive results on modestly sized datasets. The case of contiguous subsequences is also considered for comparison with the subsequences kernel with different decay factors. For larger documents and datasets the paper introduces an approximation technique that is shown to deliver good approximations efficiently for large datasets.} }
@INPROCEEDINGS{Lu2003, AUTHOR = {Qing Lu and Lise Getoor}, TITLE = {Link-based Classification}, BOOKTITLE = {ICML 2003}, YEAR = {2003}, PAGES = {496-503}, ABSTRACT = {A key challenge for machine learning is tackling the problem of mining richly structured data sets, where the objects are linked in some way due to either an explicit or implicit relationship that exists between the objects. Links among the objects demonstrate certain patterns, which can be helpful for many machine learning tasks and are usually hard to capture with traditional statistical models. Recently there has been a surge of interest in this area, fueled largely by interest in web and hypertext mining, but also by interest in mining social networks, bibliographic citation data, epidemiological data and other domains best described using a linked or graph structure. In this paper we propose a framework for modeling link distributions, a link-based model that supports discriminative models describing both the link distributions and the attributes of linked objects. We use a structured logistic regression model, capturing both content and links. We systematically evaluate several variants of our link-based model on a range of data sets including both web and citation collections. In all cases, the use of the link distribution improves classification accuracy.} }
@ARTICLE{MacKay1994, AUTHOR = {David J C MacKay}, TITLE = {Bayesian Non-linear Modeling for the Energy Prediction Competition}, JOURNAL = {ASHRAE Transactions}, YEAR = {1994}, VOLUME = {100}, PAGES = {1053-1062}, NUMBER = {2}, ABSTRACT = {Bayesian probability theory provides a unifying framework for data modeling. A model space may include numerous control parameters which influence the complexity of the model (for example regularisation constants). Bayesian methods can automatically set such parameters so that the model becomes probabilistically well-matched to the data. The 1993 energy prediction competition involved the prediction of a series of building energy loads from a series of environmental input variables. Non-linear regression using `neural networks' is a popular technique for such modeling tasks. Since it is not obvious how large a time-window of inputs is appropriate, or what preprocessing of inputs is best, this can be viewed as a regression problem in which there are many possible input variables, some of which may actually be irrelevant to the prediction of the output variable. Because a finite data set will show random correlations between the irrelevant inputs and the output, any conventional neural network (even with `weight decay') will not set the coefficients for these junk inputs to zero. Thus the irrelevant variables will hurt the model's performance. The Automatic Relevance Determination (ARD) model puts a prior over the regression parameters which embodies the concept of relevance. This is done in a simple and `soft' way by introducing multiple `weight decay' constants, one `alpha' associated with each input. Using Bayesian methods, the decay rates for junk inputs are automatically inferred to be large, preventing those inputs from causing significant overfitting. An entry using the ARD model won the prediction competition by a significant margin.} }
@INCOLLECTION{MacKay2003, AUTHOR = {David J.C. MacKay and Linda C. Bauman Peto}, TITLE = {Model Comparison and Occam's Razor}, BOOKTITLE = {Information theory, inference and learning algorithms}, PUBLISHER = {Cambridge University Press}, YEAR = {2003} }
@ARTICLE{MacKay1995, AUTHOR = {David J.C. MacKay and Linda C. Bauman Peto}, TITLE = {A Hierarchical Dirichlet Language Model}, JOURNAL = {Natural Language Engineering}, YEAR = {1995}, VOLUME = {1}, PAGES = {1-19}, NUMBER = {3} }
@ARTICLE{MacKenzie1997, AUTHOR = {Doug MacKenzie and Ronald C. Arkin and Jonathan Cameron}, TITLE = {Multiagent mission specification and execution}, JOURNAL = {Autonomous Robots}, YEAR = {1997}, VOLUME = {4}, PAGES = {29-52}, NUMBER = {1}, BOOKTITLE = {Autonomous Robots}, OWNER = {DK}, TIMESTAMP = {2006.03.07} }
@INPROCEEDINGS{Maedche2003, AUTHOR = {Alexander Maedche and Gunter Neumann and Steffen Staab}, TITLE = {Bootstrapping an ontology-based information extraction system}, BOOKTITLE = {Intelligent exploration of the web}, YEAR = {2003}, PAGES = {345 - 359}, PUBLISHER = {Physica-Verlag GmbH, Heidelberg, Germany, Germany}, ABSTRACT = {Automatic intelligent web exploration will benefit from shallow information extraction techniques if the latter can be brought to work within many different domains. The major bottleneck for this, however, lies in the so far difficult and expensive modeling of lexical knowledge, extraction rules, and an ontology that together define the information extraction system. In this paper we present a bootstrapping approach that allows for the fast creation of an ontology-based information extracting system relying on several basic components, viz. a core information extraction system, an ontology engineering environment and an inference engine. We make extensive use of machine learning techniques to support the semi-automatic, incremental bootstrapping of the domain-specific target information extraction system.} }
@TECHREPORT{Maedche2001, AUTHOR = {Alexander Maedche and Steffen Staab}, TITLE = {Comparing Ontologies Similarity Measures and a Comparison Study}, INSTITUTION = {Institute AIFB, University of Karlsruhe}, YEAR = {2001}, NUMBER = {408} }
@INPROCEEDINGS{Maedche2000, AUTHOR = {Alexander Maedche and Steffen Staab}, TITLE = {Discovering Conceptual Relations from Text}, BOOKTITLE = {European Conference on Artifical Intelligence (ECAI 2000)}, YEAR = {2000}, PAGES = {321--325}, ADDRESS = {Berlin} }
@INCOLLECTION{Mansour1994, AUTHOR = {Yishay Mansour}, TITLE = {Learning Boolean Functions via the Fourier Transform}, BOOKTITLE = {Theoretical Advances in Neural Computation and Learning}, PUBLISHER = {Kluwer}, YEAR = {1994}, EDITOR = {Vwani Roychowdhury, Kai-Yeung Siu, Alon Orlitsky}, PAGES = {391-424} }
@ARTICLE{Markovitch2002, AUTHOR = {Shaul Markovitch and Dan Rosenstein}, TITLE = {Feature Generation Using General Constructor Functions}, JOURNAL = {Machine Learning}, YEAR = {2002}, VOLUME = {49}, PAGES = {59-98}, NUMBER = {1} }
@INPROCEEDINGS{Marrakchi2000, AUTHOR = {Zakia Marrakchi and Ludovic M{\'e} and Bernard Vivinis and Benjamin Morin}, TITLE = {Flexible Intrusion Detection Using Variable-Length Behavior Modeling in Distributed Environment: Application to CORBA Objects}, BOOKTITLE = {RAID '00: Proceedings of the Third International Workshop on Recent Advances in Intrusion Detection}, YEAR = {2000}, PAGES = {130--144}, ADDRESS = {London, UK}, PUBLISHER = {Springer-Verlag}, ISBN = {3-540-41085-6} }
@INPROCEEDINGS{Maxion2002, AUTHOR = {Roy A. Maxion and Tahlia N. Townsend}, TITLE = {Masquerade Detection Using Truncated Command Lines}, BOOKTITLE = {DSN '02: Proceedings of the 2002 International Conference on Dependable Systems and Networks}, YEAR = {2002}, PAGES = {219--228}, ADDRESS = {Washington, DC, USA}, PUBLISHER = {IEEE Computer Society}, ISBN = {0-7695-1597-5} }
@INPROCEEDINGS{McCallum2003, AUTHOR = {Andrew McCallum and David Jensen}, TITLE = {A Note on the Unification of Information Extraction and Data Mining using Conditional-Probability, Relational Models}, BOOKTITLE = {IJCAI'03 Workshop on Learning Statistical Models from Relational Data}, YEAR = {2003}, ABSTRACT = {Although information extraction and data mining appear together in many applications, their interface in most current systems would better be described as serial juxtaposition than as tight integration. Information extraction populates slots in a database by identifying relevant subsequences of text, but is usually not aware of the emerging patterns and regularities in the database. Data mining methods begin from a populated database, and are often unaware of where the data came from, or its inherent uncertainties. The result is that the accuracy of both suffers, and significant mining of complex text sources is beyond reach. This position paper proposes the use of unified, relational, undirected graphical models for information extraction and data mining, in which extraction decisions and data-mining decisions are made in the same probabilistic "currency," with a common inference procedure.each component thus being able to make up for the weaknesses of the other and therefore improving the performance of both. For example, data mining run on a partially-filled database can find patterns that provide "topdown" accuracy-improving constraints to information extraction. Information extraction can provide a much richer set of "bottom-up" hypotheses to data mining if the mining is set up to handle additional uncertainty information from extraction. We outline an approach and describe several models, but provide no experimental results.} }
@INPROCEEDINGS{mccallum98comparison, AUTHOR = {Andrew McCallum and Kamal Nigam}, TITLE = {A comparison of event models for Naive Bayes text classification}, BOOKTITLE = {AAAI-98 Workshop on Learning for Text Categorization}, YEAR = {1998}, TEXT = {A. McCallum and K. Nigam. A comparison of event models for Naive Bayes text classification}, URL = {citeseer.ist.psu.edu/mccallum98comparison.html} }
@INPROCEEDINGS{mccallum98improving, AUTHOR = {Andrew K. McCallum and Ronald Rosenfeld and Tom M. Mitchell and Andrew Y. Ng}, TITLE = {Improving text classification by shrinkage in a hierarchy of classes}, BOOKTITLE = {Proceedings of {ICML}-98, 15th International Conference on Machine Learning}, YEAR = {1998}, EDITOR = {Jude W. Shavlik}, PAGES = {359--367}, ADDRESS = {Madison, US}, PUBLISHER = {Morgan Kaufmann Publishers, San Francisco, US}, URL = {citeseer.ist.psu.edu/mccallum98improving.html} }
@INCOLLECTION{McCHay69, AUTHOR = {John McCarthy and Patrick J. Hayes}, TITLE = {Some Philosophical Problems from the Standpoint of Artificial Intelligence}, BOOKTITLE = {Machine Intelligence 4}, PUBLISHER = {Edinburgh University Press}, YEAR = {1969}, EDITOR = {B. Meltzer and D. Michie}, PAGES = {463--502}, NOTE = {reprinted in McC90} }
@INPROCEEDINGS{McGovern2003, AUTHOR = {Amy McGovern and David Jensen}, TITLE = {Identifying Predictive Structures in Relational Data Using Multiple Instance Learning}, BOOKTITLE = {ICML 2003}, YEAR = {2003}, PAGES = {528-535} }
@ARTICLE{McHugh1997, AUTHOR = {Jason McHugh and Serge Abiteboul and Roy Goldman and Dallan Quass and Jennifer Widom}, TITLE = {Lore: A Database Management System for Semistructured Data}, JOURNAL = {SIGMOD Record}, YEAR = {1997}, VOLUME = {26}, PAGES = {54-66}, NUMBER = {3}, ABSTRACT = {Lore (for Lightweight Object Repository) is a DBMS designed specifically for managing semistructured information. Implementing Lore has required rethinking all aspects of a DBMS, including storage management, indexing, query processing and optimization, and user interfaces. This paper provides an overview of these aspects of the Lore system, as well as other novel features such as dynamic structural summaries and seamless access to data from external sources. 1 Introduction Traditional database ...} }
@INPROCEEDINGS{McKeown1984, AUTHOR = {D. M. McKeown and J. L. Denlinger}, TITLE = {Map-Guided Feature Extraction from Aerial Imagery}, BOOKTITLE = {Proceedings of Second IEEE Computer Society Workshop on Computer Vision: Representation and Control}, YEAR = {1984}, PAGES = {205--213}, ADDRESS = {Annapolis, Maryland}, MONTH = {May} }
@INPROCEEDINGS{Mehra1991, AUTHOR = {Pankaj Mehra and Larry A. Rendell and Benjamin W. Wah}, TITLE = {Principled Constructive Induction}, BOOKTITLE = {IJCAI 1991}, YEAR = {1991}, ABSTRACT = {A framework for the construction of new features for hard classification tasks is discussed. The approach brings together ideas from the fields of machine learning, computational geometry, and pattern recognition. Two heuristics for evaluation of newly-constructed features are proposed, and their statistical significance verified. Finally, it is shown how the proposed framework can be used to combine techniques for selection of representative examples with techniques for construction of new...} }
@TECHREPORT{Mettu1997, AUTHOR = {Ramgopal Mettu and Yuke Zhao and Vijaya Ramachandran}, TITLE = {Experimental Evaluation and Comparison of Algorithms for Incremental Graph Biconnectivity}, INSTITUTION = {University of Texas at Austin}, YEAR = {1997}, NUMBER = {CS-TR-97-17b}, MONTH = {Jan. 1997}, ABSTRACT = {We describe our implementation of an algorithm to maintain the connected components and the biconnected components of a graph where vertex and edge insertions are allowed. Algorithms for this problem can be applied to task decomposition in engineering design. Connected components are maintained using a disjoint set data structure and the biconnected components are maintained by a block forest. We implemented an incremental biconnectivity algorithm presented in Westbrook and Tarjan [8] which...} }
@INPROCEEDINGS{Mishra2001, AUTHOR = {Nina Mishra and Dan Oblinger and Leonard Pitt}, TITLE = {Sublinear time approximate clustering}, BOOKTITLE = {twelfth annual ACM-SIAM symposium on Discrete algorithms}, YEAR = {2001}, PAGES = {439 - 447}, ADDRESS = {Washington, D.C., United States}, PUBLISHER = {Society for Industrial and Applied Mathematics, Philadelphia, PA, USA}, ABSTRACT = {Clustering is of central importance in a number of disciplines including Machine Learning, Statistics, and Data Mining. This paper has two foci: (1) It describes how existing algorithms for clustering can benefit from simple sampling techniques arising from work in statistics [Pol84]. (2) It motivates and introduces a new model of clustering that is in the spirit of the “PAC (probably approximately correct)” learning model, and gives examples of efficient PAC-clustering algorithms.} }
@BOOK{Mitchell1997, TITLE = {Machine Learning}, PUBLISHER = {McGraw-Hill}, YEAR = {1997}, AUTHOR = {Tom M. Mitchell}, ADDRESS = {New York}, KEYWORDS = {machine learning, honours reading} }
@INPROCEEDINGS{Montemerlo02a, AUTHOR = {Montemerlo, M. and Thrun, S. and Koller, D. and Wegbreit, B.}, TITLE = {{FastSLAM}: {A} Factored Solution to the Simultaneous Localization and Mapping Problem}, BOOKTITLE = {Proceedings of the AAAI National Conference on Artificial Intelligence}, YEAR = {2002}, ADDRESS = {Edmonton, Canada}, PUBLISHER = {AAAI} }
@INPROCEEDINGS{Moutarlier1989, AUTHOR = {P. Moutarlier and R. Chatila}, TITLE = {An experimental system for incremental environment modeling by an autonomous mobile robot}, BOOKTITLE = {1st International Symposium on Experimental Robotics}, YEAR = {1989}, ADDRESS = {Montreal, Canada}, MONTH = {June} }
@ARTICLE{muggleton94inductive, AUTHOR = {Stephen Muggleton and Luc De Raedt}, TITLE = {Inductive Logic Programming: Theory and Methods}, JOURNAL = {Journal of Logic Programming}, YEAR = {1994}, VOLUME = {19/20}, PAGES = {629-679}, URL = {citeseer.csail.mit.edu/muggleton94inductive.html} }
@ARTICLE{Mukherjee1994, AUTHOR = {B. Mukherjee and H. Heberlein and K. Levitt}, TITLE = {Network Intrusion Detection}, JOURNAL = {IEEE Network}, YEAR = {1994}, VOLUME = {8}, PAGES = {26--41}, NUMBER = {3} }
@ARTICLE{Mukkamala2003, AUTHOR = {S. Mukkamala and A. Sung}, TITLE = {Feature Selection for Intrusion Detection Using Neural Networks and Support Vector Machines}, JOURNAL = {Journal of the Transportation Research Board}, YEAR = {2003}, PUBLISHER = {National Academies} }
@INPROCEEDINGS{Murphy1999, AUTHOR = {Kevin P. Murphy and Yair Weiss and Michael I. Jordan}, TITLE = {Loopy belief propagation for approximate inference: an empirical study}, BOOKTITLE = {the Fifteenth Conference on Uncertainty in Artificial Intelligence (UAI)}, YEAR = {1999}, ABSTRACT = {Recently, researchers have demonstrated that "loopy belief propagation" --- the use of Pearl's polytree algorithm in a Bayesian network with loops --- can perform well in the context of error-correcting codes. The most dramatic instance of this is the near Shannon-limit performance of "Turbo Codes" --- codes whose decoding algorithm is equivalent to loopy belief propagation in a chain-structured Bayesian network. In this paper we ask: is there something special about the...} }
@INPROCEEDINGS{Muslea1998, AUTHOR = {Ion Muslea and Steve Minton and Craig Knoblock}, TITLE = {Wrapper induction for semistructured Web-based information sources}, BOOKTITLE = {Workshop on Management of Semistructured Data}, YEAR = {1998}, ADDRESS = {Tucson, Arizona}, ABSTRACT = {Central to any information mediator that accesses Web-based information sources is a set of wrappers that extract relevant data from Web pages. We introduce stalker, a wrapperinduction algorithm that generates extraction rules for semistructured, Web-based information sources. stalker generates extraction rules that are expressed as simple landmark grammars, which are a class of finite automata that is more expressive than the existing extraction languages. Based on just a few training...} }
@INPROCEEDINGS{Muslea1998aaai, AUTHOR = {Ion Muslea and Steve Minton and Craig Knoblock}, TITLE = {STALKER: Learning Extraction Rules for Semistructured, Web-based Information Sources}, BOOKTITLE = {AAAI-98 Workshop on AI and Information Integration}, YEAR = {1998}, ADDRESS = {Menlo Park, CA}, ABSTRACT = {Information mediators are systems capable of providing a unified view of several information sources. Central to any mediator that accesses Web-based sources is a set of wrappers that can extract relevant information from Web pages. In this paper, we present a wrapper-induction algorithm that generates extraction rules for Web-based information sources. We introduce landmark automata, a formalism that describes classes of extraction rules. Our wrapper induction algorithm, stalker, generates...} }
@ARTICLE{Nagao1980, AUTHOR = {M. Nagao and T. Matsuyama}, TITLE = {A Structural Analysis of Complex Aerial Photographs}, JOURNAL = {Advanced Applications in Pattern Recognition}, YEAR = {1980}, VOLUME = {1}, PAGES = {1--199}, EDITOR = {M. Nadler}, PUBLISHER = {Plenum Press} }
@INCOLLECTION{Nelson1983, AUTHOR = {K. Nelson}, TITLE = {The derivation of concepts and categories from event representations}, BOOKTITLE = {New Trends in Conceptual Representations: Challenges to Piaget's Theory?}, PUBLISHER = {Lawrence Erlbaum}, YEAR = {1983}, EDITOR = {E. K. Scholnik}, ADDRESS = {Hillsdale, NJ, USA} }
@INPROCEEDINGS{Nestorov1999, AUTHOR = {Svetlozar Nestorov}, TITLE = {Integrating data mining with relational dbms: A tightly-coupled approach}, BOOKTITLE = {4th Workshop on Next Generation Information Technologies and Systems, NGITS '99}, YEAR = {1999} }
@INPROCEEDINGS{Nestorov1998, AUTHOR = {Svetlozar Nestorov and Serge Abiteboul and Rajeev Motwani}, TITLE = {Extracting Schema from Semistructured Data}, BOOKTITLE = {the 1998 ACM SIGMOD international conference on Management of data}, YEAR = {1998}, PAGES = {295 - 306}, ADDRESS = {Seattle, Washington, United States}, ABSTRACT = {Semistructured data is characterized by the lack of any fixed and rigid schema, although typically the data has some implicit structure. While the lack of fixed schema makes extracting semistructured data fairly easy and an attractive goal, presenting and querying such data is greatly impaired. Thus, a critical problem is the discovery of the structure implicit in semistructured data and, subsequently, the recasting of the raw data in terms of this structure. In this paper, we consider a very general form of semistructured data based on labeled, directed graphs. We show that such data can be typed using the greatest fixpoint semantics of monadic datalog programs. We present an algorithm for approximate typing of semistructured data. We establish that the general problem of finding an optimal such typing is NP-hard, but present some heuristics and techniques based on clustering that allow efficient and near-optimal treatment of the problem. We also present some preliminary experimental results.} }
@ARTICLE{Nestorov1997, AUTHOR = {Svetlozar Nestorov and Serge Abiteboul and Rajeev Motwani}, TITLE = {Inferring Structure in Semistructured Data}, JOURNAL = {SIGMOD Record}, YEAR = {1997}, VOLUME = {26}, PAGES = {39-43}, NUMBER = {4} }
@INPROCEEDINGS{Nestorov1997icde, AUTHOR = {Svetlozar Nestorov and Jeffrey Ullman and Janet Wiener and Sudarshan Chawathe}, TITLE = {Representative Objects: Concise Representations of Semistructured, Hierarchical Data}, BOOKTITLE = {the Thirteenth International Conference on Data Engineering}, YEAR = {1997}, ADDRESS = {Birmingham, England} }
@INPROCEEDINGS{Neuenschwander1995, AUTHOR = {W. Neuenschwander and P. Fua and G. Szekely and O. K{\"u}bler}, TITLE = {From Ziplock Snakes to Velcroa Surfaces}, BOOKTITLE = {Ascona Workshop on Automatic Extraction of Man-Made Objects from Aerial and Space Images}, YEAR = {1995}, PAGES = {105--114}, PUBLISHER = {Birkh\"auser Verlag} }
@INPROCEEDINGS{Neumann1997, AUTHOR = {Gunter Neumann and Rolf Backofen and Judith Baur and Marcus Becker and Christian Braun}, TITLE = {An information extraction core system for real world german text processing}, BOOKTITLE = {Proc. of 5th ANLP}, YEAR = {1997} }
@INPROCEEDINGS{Neville2003, AUTHOR = {Jennifer Neville and David Jensen}, TITLE = {Collective classification with relational dependency networks}, BOOKTITLE = {the 2nd Multi-Relational Data Mining Workshop, 9th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, YEAR = {2003}, ABSTRACT = {Collective classification models exploit the dependencies in a network of objects to improve predictions. For example, in a network of web pages, the topic of a page may depend on the topics of hyperlinked pages. A relational model capable of expressing and reasoning with such dependencies should achieve superior performance to relational models that ignore such dependencies. In this paper, we present relational dependency networks (RDNs), extending recent work in dependency networks to a relational setting. RDNs are a collective classification model that offers simple parameter estimation and efficient structure learning. On two real-world data sets, we compare RDNs to ordinary classification with relational probability trees and show that collective classification improves performance.} }
@BOOK{Newcombe2000, TITLE = {Making space : the development of spatial representation and reasoning}, PUBLISHER = {MIT Press}, YEAR = {2000}, AUTHOR = {Nora S. Newcombe and Janellen Huttenlocher}, ADDRESS = {Cambridge, Mass.}, OWNER = {dkkang}, TIMESTAMP = {2005.11.23} }
@INPROCEEDINGS{Nigam1999, AUTHOR = {Kamal Nigam and John Lafferty and Andrew McCallum}, TITLE = {Using Maximum Entropy for Text Classification}, BOOKTITLE = {IJCAI'99 Workshop on Information Filtering}, YEAR = {1999}, ABSTRACT = {This paper proposes the use of maximum entropy techniques for text classification. Maximum entropy is a probability distribution estimation technique widely used for a variety of natural language tasks, such as language modeling, part-of-speech tagging, and text segmentation. The underlying principle of maximum entropy is that without external knowledge, one should prefer distributions that are uniform. Constraints on the distribution, derived from labeled training data, inform the technique...} }
@INPROCEEDINGS{Nisan1563, AUTHOR = {Noam Nisan}, TITLE = {Algorithms for Selfish Agents}, BOOKTITLE = {the 16th Annual Symposium on Theoretical Aspects of Computer Science}, YEAR = {1999}, VOLUME = {1563}, PAGES = {1--15}, ABSTRACT = {This paper considers algorithmic problems in a distributed setting where the participants cannot be assumed to follow the algorithm but rather their own self-interest. Such scenarios arise, in particular, when computers or users aim to cooperate or trade over the Internet. As such participants, termed agents, are capable of manipulating the algorithm, the algorithm designer should ensure in advance that the agents' interests are best served by behaving correctly.} }
@ARTICLE{Regan2001, AUTHOR = {J. Kevin O'Regan and Alva No{\"e}}, TITLE = {A Sensorimotor Account of Vision and Visual Consciousness}, JOURNAL = {Behavioral and Brain Sciences}, YEAR = {2001}, VOLUME = {24}, PAGES = {939-1031} }
@TECHREPORT{Page1998, AUTHOR = {Lawrence Page and Sergey Brin and Rajeev Motwani and Terry Winograd}, TITLE = {The PageRank Citation Ranking: Bringing Order to the Web}, INSTITUTION = {Stanford University}, YEAR = {1998}, NOTE = {Stanford Digital Library Technologies Project}, ABSTRACT = {The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a method for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large...} }
@BOOK{Pap1995, TITLE = {Non-additive Set Functions}, PUBLISHER = {Kluwer Academic Publishers, Dordrecht}, YEAR = {1995}, AUTHOR = {E. Pap} }
@ARTICLE{DBLP:journals/bioinformatics/ParkK03, AUTHOR = {Keun-Joon Park and Minoru Kanehisa}, TITLE = {Prediction of protein subcellular locations by support vector machines using compositions of amino acids and amino acid pairs.}, JOURNAL = {Bioinformatics}, YEAR = {2003}, VOLUME = {19}, PAGES = {1656-1663}, NUMBER = {13}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de} }
@ARTICLE{Parsons2002, AUTHOR = {Simon Parsons and Michael Wooldridge}, TITLE = {Game Theory and Decision Theory in Multi-Agent Systems}, JOURNAL = {Autonomous Agents and Multi-Agent Systems}, YEAR = {2002}, VOLUME = {5}, PAGES = {243--254}, NUMBER = {3}, ADDRESS = {Hingham, MA, USA}, DOI = {http://dx.doi.org/10.1023/A:1015575522401}, PUBLISHER = {Kluwer Academic Publishers} }
@TECHREPORT{Payne2005, AUTHOR = {David L. Payne and Kenneth C. Hoffman and Richard D. Flournoy and Christopher D. Knouss and Keith W. Miller and Kangmin Zheng}, TITLE = {Simulation Over Geographic Information System ({SOGIS}) Web Service}, INSTITUTION = {The MITRE Corporation}, YEAR = {2005}, NUMBER = {05-0089}, MONTH = {Febrary} }
@INPROCEEDINGS{Pazienza2003, AUTHOR = {Maria Teresa Pazienza and Armando Stellato and Michele Vindigni}, TITLE = {Combining ontological knowledge and wrapper induction techniques into an e-retail system}, BOOKTITLE = {2003 Workshop on Adaptive Text Extraction and Mining in ECML/PKDD}, YEAR = {2003}, MONTH = {September}, OWNER = {dkkang}, TIMESTAMP = {2006.06.12} }
@INPROCEEDINGS{pazzani97, AUTHOR = {Michael Pazzani}, TITLE = {Searching for dependencies in Bayesian classifiers}, BOOKTITLE = {Artificial Intelligence and Statistics IV, Lecture Notes in Statistics}, YEAR = {1997}, ADDRESS = {New York}, PUBLISHER = {Springer-Verlag} }
@ARTICLE{Pazzani1992, AUTHOR = {Michael Pazzani and Dennis Kibler}, TITLE = {The role of prior knowledge in inductive learning}, JOURNAL = {Machine Learning}, YEAR = {1992}, VOLUME = {9}, PAGES = {54-97} }
@INPROCEEDINGS{pazzani97beyond, AUTHOR = {Michael J. Pazzani and Subramani Mani and William Rodman Shankle}, TITLE = {Beyond Concise and Colorful: Learning Intelligible Rules}, BOOKTITLE = {Knowledge Discovery and Data Mining}, YEAR = {1997}, PAGES = {235-238}, URL = {citeseer.ist.psu.edu/article/pazzani97beyond.html} }
@BOOK{Pearl2000, TITLE = {Causality: models, reasoning, and inference}, PUBLISHER = {Cambridge University Press}, YEAR = {2000}, AUTHOR = {Judea Pearl}, ADDRESS = {New York, NY, USA}, ISBN = {0-521-77362-8} }
@ARTICLE{Penfield1938, AUTHOR = {Wilder Penfield}, TITLE = {The cerebral cortex of man}, JOURNAL = {Archives of Neurology and Phychiatry}, YEAR = {1938}, VOLUME = {40}, NUMBER = {3}, OWNER = {DK}, TIMESTAMP = {2005.11.26} }
@INPROCEEDINGS{PengS03, AUTHOR = {Fuchun Peng and Dale Schuurmans}, TITLE = {Combining Naive Bayes and n-Gram Language Models for Text Classification.}, BOOKTITLE = {Advances in Information Retrieval, 25th European Conference on IR Research (ECIR 2003)}, YEAR = {2003}, EDITOR = {Fabrizio Sebastiani}, VOLUME = {2633}, SERIES = {Lecture Notes in Computer Science}, PAGES = {335-350}, MONTH = {April}, PUBLISHER = {Springer} }
@INPROCEEDINGS{Pereira+Tishby+Lee:93a, AUTHOR = {Fernando Pereira and Naftali Tishby and Lillian Lee}, TITLE = {Distributional Clustering of {E}nglish Words}, BOOKTITLE = {31st Annual Meeting of the ACL}, YEAR = {1993}, PAGES = {183-190} }
@INPROCEEDINGS{Pereira1993, AUTHOR = {Fernando Pereira and Naftali Tishby and Lillian Lee}, TITLE = {Distributional Clustering of English Words}, BOOKTITLE = {Meeting of the Association for Computational Linguistics}, YEAR = {1993}, ABSTRACT = {We describe and experimentally evaluate a method for automatically clustering words according to their distribution in particular syntactic contexts. Deterministic annealing is used to find lowest distortion sets of clusters. As the annealing parameter increases, existing clusters become unstable and subdivide, yielding a hierarchical "soft" clustering of the data. Clusters are used as the basis for class models of word coocurrence, and the models evaluated with respect to held-out test data} }
@INPROCEEDINGS{Perlich2003, AUTHOR = {Claudia Perlich and Foster Provost}, TITLE = {Aggregation-based feature invention and relational concept classes}, BOOKTITLE = {KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining}, YEAR = {2003}, PAGES = {167--176}, ADDRESS = {New York, NY, USA}, PUBLISHER = {ACM Press}, DOI = {http://doi.acm.org/10.1145/956750.956772}, ISBN = {1-58113-737-0}, LOCATION = {Washington, D.C.} }
@ARTICLE{Perugini2003, AUTHOR = {Saverio Perugini and Naren Ramakrishnan}, TITLE = {Personalizing Interactions with Information Systems}, JOURNAL = {Advances in Computers (M. Zelkowitz, Ed.)}, YEAR = {2003}, VOLUME = {57}, PAGES = {323-382}, ABSTRACT = {Personalization constitutes the mechanisms and technologies necessary to customize information access to the end-user. It can be defined as the automatic adjustment of information content, structure, and presentation tailored to the individual. In this chapter, we study personalization from the viewpoint of personalizing interaction. The survey covers mechanisms for information-finding on the web, advanced information retrieval systems, dialogbased applications, and mobile access paradigms. Specific emphasis is placed on studying how users interact with an information system and how the system can encourage and foster interaction. This helps bring out the role of the personalization system as a facilitator which reconciles the user’s mental model with the underlying information system’s organization. Three tiers of personalization systems are presented, paying careful attention to interaction considerations. These tiers show how progressive levels of sophistication in interaction can be achieved. The chapter also surveys systems support technologies and niche application domains.} }
@BOOK{Piaget1952, TITLE = {Origins of Intelligence in Children}, PUBLISHER = {International Universities Press}, YEAR = {1952}, AUTHOR = {Jean Piaget}, ISBN = {823639002} }
@INPROCEEDINGS{Piskorski2000, AUTHOR = {Jakub Piskorski and Gunter Neumann}, TITLE = {An intelligent text extraction and navigation system}, BOOKTITLE = {the 6th International Conference on Computer-Assisted Information Retrieval (RIAO-2000)}, YEAR = {2000}, ADDRESS = {Paris, France}, ABSTRACT = {We present sppc, a high-performance system for intelligent text extraction and navigation from German free text documents. sppc consists of a set of domainindependent shallow core components which are realized by means of cascaded weighted finite state machines and generic dynamic tries. All extracted information is represented uniformly in one data structure (called the text chart) in a highly compact and linked form in order to support indexing and navigation through the set of...} }
@ARTICLE{Pitt1987, AUTHOR = {Leonard Pitt and Robert E. Reinke}, TITLE = {Criteria for Polynomial Time (Conceptual) Clustering}, JOURNAL = {Machine Learning}, YEAR = {1987}, VOLUME = {2}, PAGES = {371-396}, NUMBER = {4}, ABSTRACT = {Research in cluster analysis has resulted in a large number of algorithms and similarity measurements for clustering scientific data. Machine learning researchers have published a number of methods for conceptual clustering, in which observations are grouped into clusters which have "good" descriptions in some language. We investigate the general properties which similarity metrics, objective functions, and concept description languages must have to guarantee that a (conceptual) clustering...} }
@ARTICLE{Pitt1998, AUTHOR = {Leonard Pitt and Leslie G. Valiant}, TITLE = {Computational limitations on learning from examples}, JOURNAL = {Journal of the ACM}, YEAR = {1998}, VOLUME = {35}, PAGES = {965-984}, NUMBER = {4}, ABSTRACT = {The computational complexity of learning Boolean concepts from examples is investigated. It is shown for various classes of concept representations that these cannot be learned feasibly in a distribution-free sense unless R = NP. These classes include (a) disjunctions of two monomials, (b) Boolean threshold functions, and (c) Boolean formulas in which each variable occurs at most once. Relationships between learning of heuristics and finding approximate solutions to NP-hard optimization problems are given.} }
@ARTICLE{Platt1999, AUTHOR = {John C. Platt}, TITLE = {Fast training of support vector machines using sequential minimal optimization}, JOURNAL = {Advances in kernel methods: support vector learning}, YEAR = {1999}, PAGES = {185--208}, ISBN = {0-262-19416-3}, PUBLISHER = {MIT Press} }
@INPROCEEDINGS{Plaxton1997, AUTHOR = {C. Greg Plaxton and Rajmohan Rajaraman and Andrea W. Richa}, TITLE = {Accessing Nearby Copies of Replicated Objects in a Distributed Environment}, BOOKTITLE = {ACM Symposium on Parallel Algorithms and Architectures}, YEAR = {1997}, PAGES = {311-320}, ABSTRACT = {Consider a set of shared objects in a distributed network, where several copies of each object may exist at any given time. To ensure both fast access to the objects as well as efficient utilization of network resources, it is desirable that each access request be satisfied by a copy "close" to the requesting node. Unfortunately, it is not clear how to efficiently achieve this goal in a dynamic, distributed environment in which large numbers of objects are continuously being created,...} }
@INPROCEEDINGS{Popescul2000, AUTHOR = {Alexandrin Popescul and Gary Flake and Steve Lawrence and Lyle H. Ungar and C. Lee Giles}, TITLE = {Clustering and Identifying Temporal Trends in Document Databases}, BOOKTITLE = {IEEE Advances in Digital Libraries (ADL 2000)}, YEAR = {2000}, PAGES = {173-182}, ADDRESS = {Washington, DC}, ABSTRACT = {We introduce a simple and efficient method for clustering and identifying temporal trends in hyper-linked document databases. Our method can scale to large datasets because it exploits the underlying regularity often found in hyper-linked document databases. Because of this scalability, we can use our method to study the temporal trends of individual clusters in a statistically meaningful manner. As an example of our approach, we give a summary of the temporal trends found in a scientific...} }
@ARTICLE{Pouget2003, AUTHOR = {Alex Pouget and Peter Dayan and Rich Zemel}, TITLE = {Inference and computation with population codes}, JOURNAL = {Annual Review of Neuroscience}, YEAR = {2003}, VOLUME = {26}, PAGES = {381--410} }
@BOOK{Quinlan1993, TITLE = {C4.5: programs for machine learning}, PUBLISHER = {Morgan Kaufmann Publishers Inc.}, YEAR = {1993}, AUTHOR = {J. Ross Quinlan}, ADDRESS = {San Francisco, CA, USA}, ISBN = {1-55860-238-0} }
@INPROCEEDINGS{Ramaswamy2000, AUTHOR = {Sridhar Ramaswamy and Rajeev Rastogi and Kyuseok Shim}, TITLE = {Efficient Algorithms for Mining Outliers from Large Data Sets}, BOOKTITLE = {ACM SIGMOD International Conference on Management of Data}, YEAR = {2000}, PAGES = {427--438}, ADDRESS = {Dallas, Texas}, ABSTRACT = {In this paper, we propose a novel formulation for distance-based outliers that is based on the distance of a point from its k th nearest neighbor. We rank each point on the basis of its distance to its k th nearest neighbor and declare the top n points in this ranking to be outliers. In addition to developing relatively straightforward solutions to finding such outliers based on the classical nestedloop join and index join algorithms, we develop a highly efficient partition-based algorithm...} }
@TECHREPORT{Ratnaparkhi1997, AUTHOR = {Adwait Ratnaparkhi}, TITLE = {A Simple Introduction to Maximum Entropy Models for Natural Language Processing}, INSTITUTION = {Institute for Research in Cognitive Science, University of Pennsylvania}, YEAR = {1997}, NUMBER = {97-08}, MONTH = {May 1997}, ABSTRACT = {Many problems in natural language processing can be viewed as linguistic classification problems, in which linguistic contexts are used to predict linguistic classes. Maximum entropy models offer a clean way to combine diverse pieces of contextual evidence in order to estimate the probability of a certain linguistic class occurring with a certain linguistic context. This report demonstrates the use of a particular maximum entropy model on an example problem, and then proves some relevant...} }
@INPROCEEDINGS{Ratnasamy2001, AUTHOR = {Sylvia Ratnasamy and Paul Francis and Mark Handley and Richard Karp and Scott Shenker}, TITLE = {A Scalable Content-Addressable Network}, BOOKTITLE = {ACM SIGCOMM 2001}, YEAR = {2001}, ABSTRACT = {Hash tables -- which map "keys" onto "values" -- are an essential building block in modern software systems. We believe a similar functionality would be equally valuable to large distributed systems. In this paper, we introduce the concept of a Content-Addressable Network (CAN) as a distributed infrastructure that provides hash table-like functionality on Internetlike scales. The CAN design is scalable, fault-tolerant and completely selforganizing, and we demonstrate its scalability, robustness ...} }
@INPROCEEDINGS{Ratnasamy2002, AUTHOR = {Sylvia Ratnasamy and Mark Handley and Richard Karp and Scott Shenker}, TITLE = {Topologically-Aware Overlay Construction and Server Selection}, BOOKTITLE = {IEEE INFOCOM'02}, YEAR = {2002}, ABSTRACT = {A number of large-scale distributed Internet applications could potentially benefit from some level of knowledge about the relative proximity between its participating host nodes. For example, the performance of large overlay networks could be improved if the application-level connectivity between the nodes in these networks is congruent with the underlying IP-level topology. Similarly, in the case of replicated web content, client nodes could use topological information in selecting one of...} }
@ARTICLE{reinhardt1998, AUTHOR = {A Reinhardt and T Hubbard}, TITLE = {Using neural networks for prediction of the subcellular location of proteins}, JOURNAL = {Nucleic Acids Research}, YEAR = {1998}, VOLUME = {26}, PAGES = {2230--2236}, NUMBER = {9}, MONTH = {May} }
@TECHREPORT{Rekleitis2004, AUTHOR = {Ioannis Rekleitis}, TITLE = {A Particle Filter Tutorial for Mobile Robot Localization}, INSTITUTION = {Centre for Intelligent Machines, McGill University}, YEAR = {2004}, NUMBER = {TR-CIM-04-02}, ADDRESS = {Montreal, Quebec, Canada}, MONTH = {Febrary} }
@INPROCEEDINGS{DBLP:conf/icml/RennieSTK03, AUTHOR = {Jason D. Rennie and Lawrence Shih and Jaime Teevan and David R. Karger}, TITLE = {Tackling the Poor Assumptions of Naive Bayes Text Classifiers.}, BOOKTITLE = {Machine Learning, Proceedings of the Twentieth International Conference (ICML 2003), August 21-24, 2003, Washington, DC, USA}, YEAR = {2003}, EDITOR = {Tom Fawcett and Nina Mishra}, PAGES = {616-623}, PUBLISHER = {AAAI Press}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, ISBN = {1-57735-189-4} }
@INPROCEEDINGS{Rennie2003, AUTHOR = {Jason D. M. Rennie and Lawrence Shih and Jaime Teevan and David R. Karger}, TITLE = {Tackling the Poor Assumptions of Naive Bayes Text Classifiers}, BOOKTITLE = {the Twentieth International Conference on Machine Learning}, YEAR = {2003}, ABSTRACT = {Naive Bayes is often used as a baseline in text classification because it is fast and easy to implement. Its severe assumptions make such efficiency possible but also adversely affect the quality of its results. In this paper we propose simple, heuristic solutions to some of the problems with Naive Bayes classifiers, addressing both systemic issues as well as problems that arise because text is not actually generated according to a multinomial model. We find that our simple corrections result in a fast algorithm that is competitive with stateof-the-art text classification algorithms such as the Support Vector Machine.} }
@INPROCEEDINGS{WekaProper, AUTHOR = {P. Reutemann and B. Pfahringer and E. Frank}, TITLE = {Proper: A Toolbox for Learning from Relational Data with Propositional and Multi-Instance Learners}, BOOKTITLE = {Proceedings of the 17th Australian Joint Conference on Artificial Intelligence (AI2004)}, YEAR = {2004}, PUBLISHER = {Springer-Verlag} }
@ARTICLE{rivest87learning, AUTHOR = {Ronald L. Rivest}, TITLE = {Learning Decision Lists}, JOURNAL = {Machine Learning}, YEAR = {1987}, VOLUME = {2}, PAGES = {229-246}, NUMBER = {3}, URL = {citeseer.ist.psu.edu/rivest87learning.html} }
@ARTICLE{Robnik-Sikonja2003, AUTHOR = {Marko Robnik-{\v S}ikonja and Igor Kononenko}, TITLE = {Theoretical and Empirical Analysis of ReliefF and RReliefF}, JOURNAL = {Machine Learning}, YEAR = {2003}, VOLUME = {53}, PAGES = {23 - 69}, NUMBER = {1-2}, ABSTRACT = {Relief algorithms are general and successful attribute estimators. They are able to detect conditional dependencies between attributes and provide a unified view on the attribute estimation in regression and classification. In addition, their quality estimates have a natural interpretation. While they have commonly been viewed as feature subset selection methods that are applied in prepossessing step before a model is learned, they have actually been used successfully in a variety of settings, e.g., to select splits or to guide constructive induction in the building phase of decision or regression tree learning, as the attribute weighting method and also in the inductive logic programming. A broad spectrum of successful uses calls for especially careful investigation of various features Relief algorithms have. In this paper we theoretically and empirically investigate and discuss how and why they work, their theoretical and practical properties, their parameters, what kind of dependencies they detect, how do they scale up to large number of examples and features, how to sample data for them, how robust are they regarding the noise, how irrelevant and redundant attributes influence their output and how different metrics influences them.}, ADDRESS = {Hingham, MA, USA}, ISSN = {0885-6125}, PUBLISHER = {Kluwer Academic Publishers} }
@INPROCEEDINGS{DBLP:conf/acsac/RubinJM04, AUTHOR = {Shai Rubin and Somesh Jha and Barton P. Miller}, TITLE = {Automatic Generation and Analysis of NIDS Attacks.}, BOOKTITLE = {20th Annual Computer Security Applications Conference (ACSAC 2004), 6-10 December 2004, Tucson, AZ, USA}, YEAR = {2004}, PAGES = {28-38}, PUBLISHER = {IEEE Computer Society}, EE = {http://doi.ieeecomputersociety.org/10.1109/CSAC.2004.9}, ISBN = {0-7695-2252-1} }
@ARTICLE{Sang2002, AUTHOR = {Erik F. Tjong Kim Sang}, TITLE = {Memory-based shallow parsing}, JOURNAL = {J. Mach. Learn. Res.}, YEAR = {2002}, VOLUME = {2}, PAGES = {559--594}, ADDRESS = {Cambridge, MA, USA}, ISSN = {1533-7928}, PUBLISHER = {MIT Press} }
@INPROCEEDINGS{Schapire2001, AUTHOR = {Robert E. Schapire}, TITLE = {The Boosting Approach to Machine Learning: An Overview}, BOOKTITLE = {MSRI Workshop on Nonlinear Estimation and Classification}, YEAR = {2001} }
@TECHREPORT{Schlimmer1987, AUTHOR = {Schlimmer, J.S.}, TITLE = {Concept Acquisition Through Representational Adjustment}, INSTITUTION = {Department of Information and Computer Science, University of California}, YEAR = {1987}, NUMBER = {87-19}, NOTE = {Doctoral disseration} }
@ARTICLE{Scholkopf2001, AUTHOR = {B. Scholkopf and J. Platt and J. Shawe-Taylor and A. J. Smola and R. C. Williamson}, TITLE = {Estimating the Support of a High-Dimensional Distribution}, JOURNAL = {Neural Computation}, YEAR = {2001}, VOLUME = {13}, PAGES = {1443--1472}, NUMBER = {7} }
@BOOK{Schweizer1983, TITLE = {Probabilistic Metric Spaces}, PUBLISHER = {Dover Publications}, YEAR = {1983}, AUTHOR = {B. Schweizer and A. Sklar}, ISBN = {486445143} }
@ARTICLE{Sebastiani2002, AUTHOR = {Fabrizio Sebastiani}, TITLE = {Machine learning in automated text categorization}, JOURNAL = {ACM Computing Surveys (CSUR)}, YEAR = {2002}, VOLUME = {34}, PAGES = {1-47}, NUMBER = {1}, ABSTRACT = {The automated categorization (or classification) of texts into predefined categories has witnessed a booming interest in the last 10 years, due to the increased availability of documents in digital form and the ensuing need to organize them. In the research community the dominant approach to this problem is based on machine learning techniques: a general inductive process automatically builds a classifier by learning, from a set of preclassified documents, the characteristics of the categories. The advantages of this approach over the knowledge engineering approach (consisting in the manual definition of a classifier by domain experts) are a very good effectiveness, considerable savings in terms of expert labor power, and straightforward portability to different domains. This survey discusses the main approaches to text categorization that fall within the machine learning paradigm. We will discuss in detail issues pertaining to three different problems, namely, document representation, classifier construction, and classifier evaluation.} }
@INPROCEEDINGS{Segal2001, AUTHOR = {Eran Segal and Daphne Koller and Dirk Ormoneit}, TITLE = {Probabilistic Abstraction Hierarchies}, BOOKTITLE = {14th Annual Conference on Neural Information Processing Systems}, YEAR = {2001}, ADDRESS = {Vancouver, British Columbia, Canada}, ABSTRACT = {Many domains are naturally organized in an abstraction hierarchy or taxonomy, where the instances in "nearby" classes in the taxonomy are similar. In this paper, we provide a general probabilistic framework for clustering data into a set of classes organized as a taxonomy, where each class is associated with a probabilistic model from which the data was generated. The clustering algorithm simultaneously optimizes three things: the assignment of data instances to clusters, the models...} }
@BOOK{Shapiro2001, TITLE = {Computer Vision}, PUBLISHER = {Prentice Hall}, YEAR = {2001}, AUTHOR = {Linda G. Shapiro and George C. Stockman}, OWNER = {DK}, TIMESTAMP = {2006.03.07} }
@ARTICLE{Shi2005, AUTHOR = {Zhongmin Shi and Evangelos Milios and Nur Zincir-Heywood}, TITLE = {Post-Supervised Template Induction for Information Extraction from Lists and Tables in Dynamic Web Sources}, JOURNAL = {J. Intell. Inf. Syst.}, YEAR = {2005}, VOLUME = {25}, PAGES = {69--93}, NUMBER = {1}, OWNER = {dkkang}, TIMESTAMP = {2006.06.12} }
@ARTICLE{Shutske1989, AUTHOR = {G. M. Shutske and F. A. Pierrat and K. J. Kapples and M. L. Cornfeldt and M. R. Szewczak and F. P. Huger and G. M. Bores and V. Haroutunian and K. L. Davis}, TITLE = {9-Amino-1,2,3,4-tetrahydroacridin-1-ols: synthesis and evaluation as potential Alzheimer's disease therapeutics}, JOURNAL = {Journal of Medical Chemistry}, YEAR = {1989}, VOLUME = {32}, PAGES = {1805--1803}, NUMBER = {8} }
@ARTICLE{Sibson1969, AUTHOR = {Sibson, R.}, TITLE = {Information Radius}, JOURNAL = {Z. Wahrs. und verw Geb.}, YEAR = {1969}, VOLUME = {14}, PAGES = {149-160} }
@INPROCEEDINGS{Silvescu2003, AUTHOR = {Adrian Silvescu and Vasant Honavar}, TITLE = {Ontology elicitation: Structural Abstraction = Structuring + Abstraction + Multiple Ontologies}, BOOKTITLE = {Learning@Snowbird Workshop}, YEAR = {2003}, ADDRESS = {Snowbird, Utah}, NOTE = {Poster} }
@INPROCEEDINGS{Singh2002, AUTHOR = {Munindar P. Singh}, TITLE = {The Pragmatic Web: Preliminary Thoughts}, BOOKTITLE = {the NSF-OntoWeb Workshop on Database and Information Systems Research for Semantic Web and Enterprises}, YEAR = {2002} }
@INPROCEEDINGS{Sintek2001, AUTHOR = {Michael Sintek and Markus Junker and Ludger van Elst and Andreas Abecker}, TITLE = {Using Information Extraction Rules for Extending Domain Ontologies}, BOOKTITLE = {IJCAI-2001 Workshop on Ontology Learning}, YEAR = {2001}, NOTE = {Position Statement} }
@INPROCEEDINGS{Siraj2001, AUTHOR = {Ambareen Siraj and Susan M. Bridges and Rayford B. Vaughn}, TITLE = {Fuzzy Cognitive Maps for Decision Support in an Intelligent Intrusion Detection System}, BOOKTITLE = {IFSA World Congress and 20th North American Fuzzy Information Processing Society (NAFIPS) International Conference}, YEAR = {2001}, ADDRESS = {Vancouver, Canada} }
@INPROCEEDINGS{Slonim2000, AUTHOR = {Noam Slonim and Naftali Tishby}, TITLE = {Document clustering using word clusters via the information bottleneck method}, BOOKTITLE = {Proceedings of the 23rd annual international ACM SIGIR conference on Research and development in information retrieval}, YEAR = {2000}, PAGES = {208--215}, PUBLISHER = {ACM Press}, DOI = {http://doi.acm.org/10.1145/345508.345578}, ISBN = {1-58113-226-3}, LOCATION = {Athens, Greece} }
@INPROCEEDINGS{Slonim1999, AUTHOR = {Noam Slonim and Naftali Tishby}, TITLE = {Agglomerative Information Bottleneck}, BOOKTITLE = {NIPS}, YEAR = {1999}, PAGES = {617-623}, ABSTRACT = {We introduce a novel distributional clustering algorithm that explicitly maximizes the mutual information per cluster between the data and given categories. This algorithm can be considered as a bottom up hard version of the recently introduced “Information Bottleneck Method? We relate the mutual information between clusters and categories to the Bayesian classification error, which provides another motivation for using the obtained clusters as features. The algorithm is compared with the top-down soft version of the information bottleneck method and a relationship between the hard and soft results is established. We demonstrate the algorithm on the 20 Newsgroups data set. For a subset of two news-groups we achieve compression by 3 orders of magnitudes loosing only 10% of the original mutual information.} }
@INCOLLECTION{Smith1990, AUTHOR = {R. Smith and M. Self and Peter Cheeseman}, TITLE = {Estimating uncertain spatial relationships in robotics}, BOOKTITLE = {Autonomous Robot Vehicles}, PUBLISHER = {Springer-Verlag New York, Inc.}, YEAR = {1990}, EDITOR = {I. J. Cox and G. T. Wilfong}, PAGES = {167--193}, ADDRESS = {New York, NY, USA}, ISBN = {0-387-97240-4} }
@INPROCEEDINGS{Sparks1990, AUTHOR = {D.L. Sparks and C. Lee and W.H. Rohrer}, TITLE = {Population coding of the direction, amplitude, and velocity of saccadic eye movements by neurons in the superior colliculus}, BOOKTITLE = {Cold Spring Harbor Symposia on Quantitative Biology, LV}, YEAR = {1990}, PAGES = {805--811} }
@MISC{srinivasan96role, AUTHOR = {A. Srinivasan and R. King and S. Muggleton}, TITLE = {The role of background knowledge: using a problem from chemistry to examine the performance of an {ILP} program}, YEAR = {1996}, NOTE = { Under review for Intelligent Data Analysis in Medicine and Pharmacology. Kluwer Academic Press, 1996.}, EDITOR = {N. Lavrac, E. Keravnou, and B. Zupan}, URL = {citeseer.ist.psu.edu/srinivasan96role.html} }
@INPROCEEDINGS{srinivasan96feature, AUTHOR = {Srinivasan, A. and King, R.D.}, TITLE = {Feature construction with Inductive Logic Programming: {A} study of quantitative predictions of biological activity aided by structural attributes}, BOOKTITLE = {Proceedings of the 6th International Workshop on Inductive Logic Programming}, YEAR = {1996}, EDITOR = {Muggleton, S.}, PAGES = {352-367}, PUBLISHER = {Stockholm University, Royal Institute of Technology}, URL = {citeseer.ist.psu.edu/srinivasan96feature.html} }
@INPROCEEDINGS{srinivasan94mutagenesis, AUTHOR = {Srinivasan, A. and Muggleton, S. and King, R.D. and Sternberg, M.J.E.}, TITLE = {Mutagenesis: {ILP} experiments in a non-determinate biological domain}, BOOKTITLE = {Proceedings of the 4th International Workshop on Inductive Logic Programming}, YEAR = {1994}, EDITOR = {Wrobel, S.}, VOLUME = {237}, PAGES = {217-232}, PUBLISHER = {{G}esellschaft f{\"{u}}r {M}athematik und {D}atenverarbeitung {MBH}}, URL = {citeseer.ist.psu.edu/srinivasan94mutagenesis.html} }
@ARTICLE{Srinivasan99, AUTHOR = {Ashwin Srinivasan and Ross D. King}, TITLE = {Feature construction with Inductive Logic Programming: A Study of Quantitative Predictions of Biological Activity Aided by Structural Attributes}, JOURNAL = {Data Min. Knowl. Discov.}, YEAR = {1999}, VOLUME = {3}, PAGES = {37--57}, NUMBER = {1}, ADDRESS = {Hingham, MA, USA}, DOI = {http://dx.doi.org/10.1023/A:1009815821645}, ISSN = {1384-5810}, PUBLISHER = {Kluwer Academic Publishers} }
@INPROCEEDINGS{Stading2002, AUTHOR = {Tyron Stading and Petros Maniatis and Mary Baker}, TITLE = {Peer-to-Peer Caching Schemes to Address Flash Crowds}, BOOKTITLE = {1st International Peer To Peer Systems Workshop}, YEAR = {2002}, ADDRESS = {Cambridge, MA, USA} }
@ARTICLE{Stavrou2004, AUTHOR = {Angelos Stavrou and Dan Rubenstein and Sambit Sahu}, TITLE = {A Lightweight, Robust P2P System to Handle Flash Crowds}, JOURNAL = {IEEE Journal on Selected Areas in Communications (JSAC)}, YEAR = {2004}, VOLUME = {22}, NUMBER = {1}, ABSTRACT = {Internet flash crowds (a.k.a. hot spots) are a phenomenon that result from a sudden, unpredicted increase in an on-line object's popularity. Currently, there is no efficient means within the Internet to scalably deliver web objects under hot spot conditions to all clients that desire the object. We present PROOFS: a simple, lightweight, peerto -peer (P2P) approach that uses randomized overlay construction and randomized, scoped searches to efficiently locate and deliver objects under heavy...} }
@INPROCEEDINGS{Steck2002, AUTHOR = {Harald Steck and Tommi Jaakkola}, TITLE = {Unsupervised Active Learning in Large Domains}, BOOKTITLE = {the 18th Annual Conference on Uncertainty in Artificial Intelligence (UAI-02)}, YEAR = {2002}, PAGES = {469-476}, PUBLISHER = {Morgan Kaufmann Publishers}, ABSTRACT = {Active learning is a powerful approach to analyzing data effectively. We show that the feasibility of active learning depends crucially on the choice of measure with respect to which the query is being optimized. The standard information gain, for example, does not permit an accurate evaluation with a small committee, a representative subset of the model space. We propose a surrogate measure requiring only a small committee and discuss the properties of this new measure. We devise, in addition, a bootstrap approach for committee selection. The advantages of this approach are illustrated in the context of recovering (regulatory) network models.} }
@INPROCEEDINGS{Stoica2001, AUTHOR = {Ion Stoica and Robert Morris and David Karger and M. Frans Kaashoek and Hari Balakrishnan}, TITLE = {Chord: A Scalable Peer-to-peer Lookup Protocol for Internet Applications}, BOOKTITLE = {the 2001 ACM SIGCOMM Conference}, YEAR = {2001}, PAGES = {149--160}, ADDRESS = {San Diego, California}, ABSTRACT = {A fundamental problem that confronts peer-to-peer applications is the efficient location of the node that stores a desired data item. This paper presents Chord, a distributed lookup protocol that addresses this problem. Chord provides support for just one operation: given a key, it maps the key onto a node. Data location can be easily implemented on top of Chord by associating a key with each data item, and storing the key/data item pair at the node to which the key maps. Chord adapts...} }
@INPROCEEDINGS{Stoytchev2005, AUTHOR = {Alexander Stoytchev}, TITLE = {Behavior-Grounded Representation of Tool Affordances}, BOOKTITLE = {Proceedings of IEEE International Conference on Robotics and Automation (ICRA), Barcelona, Spain}, YEAR = {2005}, PAGES = {805--811}, MONTH = {April} }
@BOOK{Sutton1992, TITLE = {Reinforcement Learning}, PUBLISHER = {Kluwer Academic Publishers}, YEAR = {1992}, AUTHOR = {Richard S. Sutton}, ADDRESS = {Norwell, MA, USA}, ISBN = {792392345} }
@INPROCEEDINGS{sutton91learning, AUTHOR = {Richard S. Sutton and Christopher J. Matheus}, TITLE = {Learning Polynomial Functions by Feature Construction}, BOOKTITLE = {Machine Learning}, YEAR = {1991}, PAGES = {208-212}, URL = {citeseer.ist.psu.edu/sutton91learning.html} }
@INPROCEEDINGS{Kymie2002, AUTHOR = {Kymie M. C. Tan and Roy A. Maxion}, TITLE = {``{W}hy 6?'' {D}efining the Operational Limits of Stide, an Anomaly-Based Intrusion Detector}, BOOKTITLE = {Proceedings of the 2002 IEEE Symposium on Security and Privacy}, YEAR = {2002}, PAGES = {188}, PUBLISHER = {IEEE Computer Society}, ISBN = {0-7695-1543-6} }
@INPROCEEDINGS{tandon2004, AUTHOR = {Gaurav Tandon and Philip Chan and Debasis Mitra}, TITLE = {MORPHEUS: motif oriented representations to purge hostile events from unlabeled sequences}, BOOKTITLE = {VizSEC/DMSEC '04: Proceedings of the 2004 ACM workshop on Visualization and data mining for computer security}, YEAR = {2004}, PAGES = {16--25}, ADDRESS = {New York, NY, USA}, PUBLISHER = {ACM Press}, DOI = {http://doi.acm.org/10.1145/1029208.1029212}, ISBN = {1-58113-974-8}, LOCATION = {Washington DC, USA} }
@INPROCEEDINGS{tandon2005, AUTHOR = {Gaurav Tandon and Philip K. Chan}, TITLE = {Learning Useful System Call Attributes for Anomaly Detection}, BOOKTITLE = {(FLAIRS-2005)}, YEAR = {2005}, PAGES = {405--411}, ADDRESS = {Clearwater Beach, Florida, USA} }
@INPROCEEDINGS{tandon2003, AUTHOR = {Gaurav Tandon and Philip K. Chan}, TITLE = {Learning Rules from System Call Arguments and Sequences for Anomaly Detection}, BOOKTITLE = {Proceedings of the 3rd IEEE International Conference on Data Mining (ICDM) Workshop on Data Mining for Computer Security (DMSEC)}, YEAR = {2003}, ADDRESS = {Melbourne, Florida, USA} }
@ARTICLE{Taneja1995, AUTHOR = {Taneja, I.J.}, TITLE = {New Developments in Generalized Information Measures}, JOURNAL = {Advances in Imaging and Electron Physics}, YEAR = {1995}, VOLUME = {91}, PAGES = {37-135}, EDITOR = {P.W. Hawkes} }
@ARTICLE{TangTRP1991, AUTHOR = {YY Tang and HD Cheng and CY Suen}, TITLE = {Transformation-ring-projection (TRP) algorithm and its VLSI implementation}, JOURNAL = {Int J Pattern Recogn Artif Intell}, YEAR = {1991}, VOLUME = {5}, PAGES = {25-56}, OWNER = {dkkang}, TIMESTAMP = {2006.03.06} }
@ARTICLE{Tax2004, AUTHOR = {David M. J. Tax and Robert P. W. Duin}, TITLE = {Support Vector Data Description}, JOURNAL = {Machine Learning}, YEAR = {2004}, VOLUME = {54}, PAGES = {45-66}, NUMBER = {1}, ABSTRACT = {Data domain description concerns the characterization of a data set. A good description covers all target data but includes no superfluous space. The boundary of a dataset can be used to detect novel data or outliers. We will present the Support Vector Data Description (SVDD) which is inspired by the Support Vector Classifier. It obtains a spherically shaped boundary around a dataset and analogous to the Support Vector Classifier it can be made flexible by using other kernel functions. The method is made robust against outliers in the training set and is capable of tightening the description by using negative examples. We show characteristics of the Support Vector Data Descriptions using artificial and real data.}, KEYWORDS = {outlier detection, novelty detection, one-class classification, support vector classifier, support vector data description} }
@INPROCEEDINGS{Taylor1997, AUTHOR = {Taylor, M. and Stoffel, K. and and Hendler, J.}, TITLE = {Ontology based Induction of High Level Classification Rules}, BOOKTITLE = {SIGMOD Workshop on Research Issues on Data Mining and Knowledge Discovery}, YEAR = {1997} }
@INPROCEEDINGS{DBLP:conf/dmkd/TaylorSH97, AUTHOR = {Merwyn G. Taylor and Kilian Stoffel and James A. Hendler}, TITLE = {Ontology-based Induction of High Level Classification Rules.}, BOOKTITLE = {DMKD}, YEAR = {1997}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de} }
@ARTICLE{Tejada2001, AUTHOR = {Sheila Tejada and Craig A. Knoblock and Steven Minton}, TITLE = {Learning object identification rules for information integration}, JOURNAL = {Information Systems Journal, Special Issue on Data Extraction, Cleaning, and Reconciliation}, YEAR = {2001}, VOLUME = {26}, PAGES = {607-633}, NUMBER = {8}, ABSTRACT = {When integrating information from multiple websites, the same data objects can exist in inconsistent text formats across sites, making it difficult to identify matching objects using exact text match. We have developed an object identification system called Active Atlas, which compares the objects' shared attributes in order to identify matching objects. Certain attributes are more important for deciding if a mapping should exist between two objects. Previous methods of object identification have required manual construction of object identification rules or mapping rules for determining the mappings between objects. This manual process is time consuming and error-prone. In our approach, Active Atlas learns to tailor mapping rules, through limited user input, to a specific application domain. The experimental results demonstrate that we achieve higher accuracy and require less user involvement than previous methods across various application domains.} }
@INPROCEEDINGS{Thrun02d, AUTHOR = {S. Thrun}, TITLE = {Particle Filters in Robotics}, BOOKTITLE = {Proceedings of the 17th Annual Conference on Uncertainty in AI (UAI)}, YEAR = {2002} }
@ARTICLE{Thrun2000e, AUTHOR = {Thrun, S.}, TITLE = {Probabilistic Algorithms in Robotics}, JOURNAL = {AI Magazine}, YEAR = {2000}, VOLUME = {21}, PAGES = {93--109}, NUMBER = {4} }
@TECHREPORT{Thrun2002, AUTHOR = {Sebastian Thrun}, TITLE = {Robotic Mapping: A Survey}, INSTITUTION = {School of Computer Science, Carnegie Mellon University}, YEAR = {2002}, NUMBER = {CMU-CS-02-111}, ADDRESS = {Pittsburgh, PA 15213}, MONTH = {February}, OWNER = {DK}, TIMESTAMP = {2005.12.22} }
@INPROCEEDINGS{Shengfeng2004, AUTHOR = {Shengfeng Tian and Jian Yu and Chuanhuan Yin}, TITLE = {Anomaly Detection Using Support Vector Machines}, BOOKTITLE = {International Symposium on Neural Networks (ISNN 2004)}, YEAR = {2004} }
@ARTICLE{Topsoe2000, AUTHOR = {Tops{$\phi$}e, Flemming}, TITLE = {Some Inequalities for Information Divergence and Related Measures of Discrimination}, JOURNAL = {IEEE Transactions on Information Theory}, YEAR = {2000}, VOLUME = {46}, PAGES = {1602-1609}, ISSN = {0018-9448}, PUBLISHER = {IEEE} }
@INPROCEEDINGS{Trinder1995, AUTHOR = {J. Trinder and H. Li}, TITLE = {Semi-Automatic Feature Extraction by Snakes}, BOOKTITLE = {Ascona Workshop on Automatic Extraction of Man-Made Objects from Aerial and Space Images}, YEAR = {1995}, PAGES = {95--104}, PUBLISHER = {Birkh\"auser Verlag} }
@INPROCEEDINGS{Tsur1998, AUTHOR = {Dick Tsur and Jeffrey D. Ullman and Serge Abiteboul and Chris Clifton and Rajeev Motwani and Svetlozar Nestorov and Arnon Rosenthal}, TITLE = {Query Flocks: A Generalization of Association-Rule Mining}, BOOKTITLE = {ACM-SIGMOD}, YEAR = {1998}, PAGES = {1--12} }
@INPROCEEDINGS{Turk1991, AUTHOR = {M. A. Turk and A. P. Pentland}, TITLE = {Face recognition using eigenfaces}, BOOKTITLE = {Proc. IEEE Conference on Computer Vision and Pattern Recognition}, YEAR = {1991}, PAGES = {586--591}, ADDRESS = {Maui, Hawaii}, ABSTRACT = {An approach to the detection and identification of human faces is presented, and a working, near-real-time face recognition system which tracks a subject's head and then recognizes the person by comparing characteristics of the face to those of known individuals is described. This approach treats face recognition as a two-dimensional recognition problem, taking advantage of the fact that faces are normally upright and thus may be described by a small set of 2-D characteristic views. Face images are projected onto a feature space (`face space') that best encodes the variation among known face images. The face space is defined by the `eigenfaces', which are the eigenvectors of the set of faces; they do not necessarily correspond to isolated features such as eyes, ears, and noses. The framework provides the ability to learn to recognize new faces in an unsupervised manner}, JOURNAL = {Computer Vision and Pattern Recognition}, OWNER = {dkkang}, TIMESTAMP = {2006.07.10} }
@ARTICLE{Ukkonen1995, AUTHOR = {Esko Ukkonen}, TITLE = {On-line construction of suffix-trees}, JOURNAL = {Algorithmica}, YEAR = {1995}, VOLUME = {14}, PAGES = {249-260}, OWNER = {dkkang}, TIMESTAMP = {2006.05.26} }
@ARTICLE{Undercoffer2004, AUTHOR = {Jeffrey L Undercoffer and Anupam Joshi and Tim Finin and John Pinkston}, TITLE = {{A Target Centric Ontology for Intrusion Detection: Using DAML+OIL to Classify Intrusive Behaviors}}, JOURNAL = {Knowledge Engineering Review}, YEAR = {2004}, MONTH = {January}, EDITION = {Special Issue on Ontologies for Distributed Systems}, PUBLISHER = {Cambridge University Press} }
@INPROCEEDINGS{Flavian2006, AUTHOR = {Flavian Vasile and Adrian Silvescu and Dae-Ki Kang and Vasant Honavar}, TITLE = {{TRIPPER}: Rule learning using taxonomies}, BOOKTITLE = {10th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD 2006)}, YEAR = {2006}, VOLUME = {3918}, SERIES = {Lecture Notes in Artificial Intelligence}, ADDRESS = {Singapore}, MONTH = {April}, PUBLISHER = {Springer Verlag} }
@INPROCEEDINGS{Flavian2005, AUTHOR = {Flavian Vasile and Adrian Silvescu and Dae-Ki Kang and Vasant Honavar}, TITLE = {{TRIPPER}: Rule learning using taxonomies}, BOOKTITLE = {Proceedings of AAAI-05 Workshop on Human Comprehensible Machine Learning}, YEAR = {2005}, ADDRESS = {Pittsburgh, Pennsylvania, USA} }
@ARTICLE{Visalberghi1994, AUTHOR = {E. Visalberghi and L. Limongelli}, TITLE = {Lack of comprehension of cause-effect relations in tool-using capuchin monkeys (Cebus apella)}, JOURNAL = {J. Comp. Psychol.}, YEAR = {1994}, VOLUME = {108}, PAGES = {15--22} }
@ARTICLE{DeVolder2001, AUTHOR = {A. G. De Volder and H. Toyama and Y. Kimura and M. Kiyosawa and H. Nakano and A. Vanlierde and M. C. Wanet-Defalque and M. Mishina and K. Oda and K. Ishiwata and M. Senda}, TITLE = {Auditory Triggered Mental Imagery of Shape Involves Visual Association Areas in Early Blind Humans}, JOURNAL = {Neuroimage}, YEAR = {2001}, VOLUME = {14}, PAGES = {129-139}, MONTH = {July} }
@BOOK{Vygotski1962, TITLE = {Thought and Language}, PUBLISHER = {The MIT Press}, YEAR = {1962}, AUTHOR = {Lev S. Vygotsky} }
@INPROCEEDINGS{wagner02mimicry, AUTHOR = {D. Wagner and P. Soto}, TITLE = {Mimicry attacks on host based intrusion detection systems}, BOOKTITLE = {Proc. Ninth ACM Conference on Computer and Communications Security}, YEAR = {2002}, TEXT = {D. Wagner and P. Soto. Mimicry attacks on host based intrusion detection systems. In Proc. Ninth ACM Conference on Computer and Communications Security, 2002.}, URL = {citeseer.ist.psu.edu/wagner02mimicry.html} }
@INPROCEEDINGS{Waldvogel1997, AUTHOR = {Marcel Waldvogel and George Varghese and Jon Turner and Bernhard Plattner}, TITLE = {Scalable High Speed IP Routing Lookups}, BOOKTITLE = {SIGCOMM '97}, YEAR = {1997}, ABSTRACT = {Internet address lookup is a challenging problem because of increasing routing table sizes, increased traffic, higher speed links, and the migration to 128 bit IPv6 addresses. IP routing lookup requires computing the best matching prefix, for which standard solutions like hashing were believed to be inapplicable. The best existing solution we know of, BSD radix tries, scales badly as IP moves to 128 bit addresses. Our paper describes a new algorithm for best matching prefix using binary search...} }
@TECHREPORT{WangWrapper2002, AUTHOR = {Jiying Wang and Frederick Lochovsky}, TITLE = {Wrapper Induction based on Nested Pattern Discovery}, INSTITUTION = {Dept. of Computer Science, Hong Kong U. of Science \& Technology}, YEAR = {2002}, NUMBER = {HKUST-CS-27-02}, NOTE = {submitted for publication}, OWNER = {dkkang}, TIMESTAMP = {2006.06.12} }
@INPROCEEDINGS{Wang2002, AUTHOR = {Jun Wang and Les Gasser}, TITLE = {Mutual online concept learning for multiple agents}, BOOKTITLE = {the first international joint conference on Autonomous agents and multiagent systems}, YEAR = {2002}, PAGES = {362 - 369}, ABSTRACT = {To create multi-agent systems that are both adaptive and open, agents must collectively learn to generate and adapt their own concepts, ontologies, interpretations, and even languages actively in an online fashion. A central issue is the potential lack of any pre-existing concept to be learned; instead, agents may need to collectively design a concept that is evolving as they exchange information. This paper presents a framework for mutual online concept learning (MOCL) in a shared world. MOCL extends classical online concept learning from single-agent to multi-agent settings. Based on the Perceptron algorithm, we present a specific MOCL algorithm, called the mutual perceptron convergence algorithm, which can converge within a finite number of mistakes under some conditions. Analysis of the convergence conditions shows that the possibility of convergence depends on the quality of the instances they produce. Finally, we point out applications of MOCL and the convergence algorithm to the formation of adaptive ontological and linguistic knowledge such as dynamically generated shared vocabulary and grammar structures.} }
@INPROCEEDINGS{Wang2003, AUTHOR = {Ke Wang and Salvatore J. Stolfo}, TITLE = {One Class Training for Masquerade Detection}, BOOKTITLE = {ICDM Workshop on Data Mining for Computer Security (DMSEC 03)}, YEAR = {2003}, ADDRESS = {Melbourne, FL} }
@INPROCEEDINGS{warrender99detecting, AUTHOR = {Christina Warrender and Stephanie Forrest and Barak A. Pearlmutter}, TITLE = {Detecting Intrusions using System Calls: Alternative Data Models}, BOOKTITLE = {{IEEE} Symposium on Security and Privacy}, YEAR = {1999}, PAGES = {133-145}, LOCATION = {Oakland, CA}, URL = {citeseer.ist.psu.edu/warrender99detecting.html} }
@INBOOK{Watson1994Chap8, CHAPTER = {Detection of self: The perfect algorithm}, TITLE = {Self-Awareness in Animals and Humans: Developmental Perspectives}, PUBLISHER = {Cambridge University Press}, YEAR = {1994}, AUTHOR = {John S. Watson} }
@INPROCEEDINGS{Wespi2000, AUTHOR = {Andreas Wespi and Marc Dacier and Herv\&\#233; Debar}, TITLE = {Intrusion Detection Using Variable-Length Audit Trail Patterns}, BOOKTITLE = {RAID '00: Proceedings of the Third International Workshop on Recent Advances in Intrusion Detection}, YEAR = {2000}, PAGES = {110--129}, ADDRESS = {London, UK}, PUBLISHER = {Springer-Verlag}, ISBN = {3-540-41085-6} }
@ARTICLE{Wong1997, AUTHOR = {S. K. M. Wong}, TITLE = {An Extended Relational Data Model For Probabilistic Reasoning}, JOURNAL = {J. Intell. Inf. Syst.}, YEAR = {1997}, VOLUME = {9}, PAGES = {181--202}, NUMBER = {2}, ADDRESS = {Hingham, MA, USA}, DOI = {http://dx.doi.org/10.1023/A:1008603515938}, ISSN = {0925-9902}, PUBLISHER = {Kluwer Academic Publishers} }
@INPROCEEDINGS{feihong2005, AUTHOR = {Feihong Wu and Jun Zhang and Vasant Honavar}, TITLE = {Learning Classifiers Using Hierarchically Structured Class Taxonomies}, BOOKTITLE = {Proceedings of the Symposium on Abstraction, Reformulation, and Approximation (SARA 2005)}, YEAR = {2005}, VOLUME = {3607}, PAGES = {313-320}, ADDRESS = {Edinburgh}, PUBLISHER = {Springer-Verlag}, OWNER = {dkkang}, TIMESTAMP = {2006.08.01} }
@INPROCEEDINGS{Yakhnenko2005, AUTHOR = {Oksana Yakhnenko and Adrian Silvescu and Vasant Honavar}, TITLE = {Discriminatively Trained Markov Model for Sequence Classification}, BOOKTITLE = {IEEE Conference on Data Mining (ICDM 2005)}, YEAR = {2005}, ADDRESS = {Houston, Texas}, OWNER = {dkkang}, TIMESTAMP = {2005.11.28} }
@INPROCEEDINGS{yamazaki95learning, AUTHOR = {Takefumi Yamazaki and Michael J. Pazzani and Christopher J. Merz}, TITLE = {Learning Hierarchies from Ambiguous Natural Language Data}, BOOKTITLE = {International Conference on Machine Learning}, YEAR = {1995}, PAGES = {575-583}, URL = {citeseer.ist.psu.edu/279676.html} }
@INPROCEEDINGS{Yan2003, AUTHOR = {Yan, C. and Dobbs, D. and Honavar, V.}, TITLE = {Identification of Surface Residues Involved in Protein-Protein Interaction -- A Support Vector Machine Approach}, BOOKTITLE = {Intelligent Systems Design and Applications (ISDA-03)}, YEAR = {2003}, EDITOR = {Abraham, A. and Franke, K. and Koppen, M.}, PAGES = {53-62}, PUBLISHER = {Springer-Verlag} }
@INPROCEEDINGS{YanDH04, AUTHOR = {Changhui Yan and Drena Dobbs and Vasant Honavar}, TITLE = {A two-stage classifier for identification of protein-protein interface residues.}, BOOKTITLE = {Proceedings Twelfth International Conference on Intelligent Systems for Molecular Biology / Third European Conference on Computational Biology (ISMB/ECCB 2004)}, YEAR = {2004}, PAGES = {371-378}, BIBSOURCE = {DBLP, http://dblp.uni-trier.de}, EE = {http://dx.doi.org/10.1093/bioinformatics/bth920} }
@ARTICLE{Yang1999, AUTHOR = {Jihoon Yang and Rajesh Parekh and Vasant Honavar}, TITLE = {DistAl: An inter-pattern distance-based constructive learning algorithm}, JOURNAL = {Intell. Data Anal.}, YEAR = {1999}, VOLUME = {3}, PAGES = {55-73}, NUMBER = {1} }
@INPROCEEDINGS{Yedidia2001, AUTHOR = {Jonathan S. Yedidia and William T. Freeman and Yair Weiss}, TITLE = {Understanding Belief Propagation and Its Generalizations}, BOOKTITLE = {IJCAI 2001}, YEAR = {2001} }
@INPROCEEDINGS{yin04crossmine, AUTHOR = {Xiaoxin Yin and Jiawei Han and Jiong Yang and Philip S. Yu}, TITLE = {CrossMine: Efficient Classification Across Multiple Database Relations}, BOOKTITLE = {Proceedings of the 20th International Conference on Data Engineering}, YEAR = {2004}, ADDRESS = {Boston, MA, USA}, URL = {citeseer.ist.psu.edu/yin04crossmine.html} }
@ARTICLE{Zelenko2003, AUTHOR = {Dmitry Zelenko and Chinatsu Aone and Anthony Richardella}, TITLE = {Kernel methods for relation extraction}, JOURNAL = {The Journal of Machine Learning Research}, YEAR = {2003}, VOLUME = {3}, PAGES = {1083 - 1106}, NOTE = {Special issue on Machine learning methods for text and images}, ABSTRACT = {We present an application of kernel methods to extracting relations from unstructured natural language sources. We introduce kernels defined over shallow parse representations of text, and design efficient algorithms for computing the kernels. We use the devised kernels in conjunction with Support Vector Machine and Voted Perceptron learning algorithms for the task of extracting person-affiliation and organization-location relations from text. We experimentally evaluate the proposed methods and compare them with feature-based learning algorithms, with promising results.} }
@ARTICLE{Zemel1998, AUTHOR = {Richard S. Zemel and Peter Dayan and Alexandre Pouget}, TITLE = {Probabilistic Interpretation of Population Codes}, JOURNAL = {Neural Computation}, YEAR = {1998}, VOLUME = {10}, PAGES = {403--430}, NUMBER = {2} }
@INPROCEEDINGS{Zhang2004icdm, AUTHOR = {Jun Zhang and Vasant Honavar}, TITLE = {{A}{V}{T}-{N}{B}{L}: An Algorithm for Learning Compact and Accurate Naive Bayes Classifiers from Attribute Value Taxonomies and Data}, BOOKTITLE = {International Conference on Data Mining (ICDM 2004)}, YEAR = {2004} }
@INPROCEEDINGS{Zhang2004isda, AUTHOR = {Jun Zhang and Vasant Honavar}, TITLE = {Learning Naive Bayes Classifiers from Attribute Value Taxonomies and Partially Specified Data}, BOOKTITLE = {International Conference on Intelligent System Design and Applications (ISDA 2004)}, YEAR = {2004} }
@INPROCEEDINGS{Zhang2003, AUTHOR = {Jun Zhang and Vasant Honavar}, TITLE = {Learning Decision Tree Classifiers from Attribute Value Taxonomies and Partially Specified Data}, BOOKTITLE = {the Twentieth International Conference on Machine Learning (ICML 2003)}, YEAR = {2003}, ADDRESS = {Washington, DC} }
@ARTICLE{Zhang2004KIS, AUTHOR = {Jun Zhang and Dae-Ki Kang and Adrian Silvescu and Vasant Honavar}, TITLE = {Learning Accurate and Concise Na{\"i}ve Bayes Classifiers from Attribute Value Taxonomies and Data}, JOURNAL = {Knowledge and Information Systems}, YEAR = {2006}, VOLUME = {9}, NUMBER = {2}, MONTH = {March} }
@INPROCEEDINGS{Zhang2002, AUTHOR = {Jun Zhang and Adrian Silvescu and Vasant Honavar}, TITLE = {Ontology-Driven Induction of Decision Trees at Multiple Levels of Abstraction}, BOOKTITLE = {Proceedings of Symposium on Abstraction, Reformulation, and Approximation 2002. Vol. 2371 of Lecture Notes in Artificial Intelligence : Springer-Verlag}, YEAR = {2002}, ABSTRACT = {Most learning algorithms for data-driven induction of pattern classifiers (e.g., the decision tree algorithm), typically represent input patterns at a single level of abstraction -- usually in the form of an ordered tuple of attribute values. However, in many applications of inductive learning -- e.g., scientific discovery, users often need to explore a data set at multiple levels of abstraction, and from different points of view. Each point of view corresponds to a set of ontological (and...} }
@INPROCEEDINGS{Zhang1996, AUTHOR = {Tian Zhang and Raghu Ramakrishnan and Miron Livny}, TITLE = {BIRCH: An Efficient Data Clustering Method for Very Large Databases}, BOOKTITLE = {the 1996 ACM SIGMOD international conference on Management of data}, YEAR = {1996}, PAGES = {103 - 114}, ADDRESS = {Montreal, Quebec, Canada}, ABSTRACT = {Finding useful patterns in large datasets has attracted considerable interest recently, and one of the most widely studied problems in this area is the identification of clusters, or densely populated regions, in a multi-dimensional dataset. Prior work does not adequately address the problem of large datasets and minimization of I/O costs.This paper presents a data clustering method named BIRCH (Balanced Iterative Reducing and Clustering using Hierarchies), and demonstrates that it is especially suitable for very large databases. BIRCH incrementally and dynamically clusters incoming multi-dimensional metric data points to try to produce the best quality clustering with the available resources (i.e., available memory and time constraints). BIRCH can typically find a good clustering with a single scan of the data, and improve the quality further with a few additional scans. BIRCH is also the first clustering algorithm proposed in the database area to handle "noise" (data points that are not part of the underlying pattern) effectively.We evaluate BIRCH's time/space efficiency, data input order sensitivity, and clustering quality through several experiments. We also present a performance comparisons of BIRCH versus CLARANS, a clustering method proposed recently for large datasets, and show that BIRCH is consistently superior.} }
@ARTICLE{TongZhang2002, AUTHOR = {Tong Zhang and Fred Damerau and David Johnson}, TITLE = {Text Chunking based on a Generalization of Winnow}, JOURNAL = {JMLR Special Issue on Shallow Parsing}, YEAR = {2002}, VOLUME = {2}, PAGES = {615-637} }
@TECHREPORT{Zhao2001, AUTHOR = {Ben Y. Zhao and John Kubiatowicz and Anthony D. Joseph}, TITLE = {Tapestry: An Infrastructure for Fault-tolerant Wide-area Location and Routing}, INSTITUTION = {Computer Science Division, U. C. Berkeley}, YEAR = {2001}, NUMBER = {CSD-01-1141}, MONTH = {April 2001}, ABSTRACT = {In today's chaotic network, data and services are mobile and replicated widely for availability, durability, and locality. Components' within this infrastructure interact in rich and complex ways, greatly stressing traditional approaches to name service and routing. This paper explores an alternative to traditional approaches called Tapestry. Tapestry is an overlay location and routing infrastructure that provides location-independent routing of messages directly to the closest copy of an...} }
@BOOK{Calvo2002, TITLE = {Aggregation Operators : New Trends and Applications}, PUBLISHER = {Physica-Verlag Heidelberg}, YEAR = {2002}, EDITOR = {Tomasa Calvo and Gaspar Mayor and Radko Mesiar}, ISBN = {3790814687} }
@BOOK{Liu1998, TITLE = {Feature Selection for Knowledge Discovery and Data Mining}, PUBLISHER = {Kluwer Academic Publishers}, YEAR = {1999}, EDITOR = {Huan Liu and Hiroshi Motoda}, ISBN = {079238198X} }
@BOOK{Parker1994, TITLE = {Self-Awareness in Animals and Humans: Developmental Perspectives}, PUBLISHER = {Cambridge University Press}, YEAR = {1994}, EDITOR = {S. Parker and R. Mitchell and M. Boccia} }
@BOOK{Knoblock1993, TITLE = {Generating Abstraction Hierarchies: An automated approach to reducing search in planning}, PUBLISHER = {Kluwer Academic Publishers}, YEAR = {1993}, EDITOR = {Craig A. Knoblock}, ISBN = {792393104} }
This file has been generated by bibtex2html 1.74