We thank Loredana Afanasiev, Graduate School of Logic, 
University of Amsterdam;
 Teun Boekhout, 
Eiko Kuramae,
 Vincent Robert, 
Fungal Biodiversity Center, Royal Netherlands Academy of Sciences;
Marc Klein Wolt, 
Thomas Maccarone, 
Astronomical Institute ``Anton Pannekoek'', University of Amsterdam;
Evgeny Verbitskiy, Philips Research; 
Steven de Rooij, 
Ronald de Wolf, CWI;
the referees and the editors,
for suggestions, comments, help with experiments, and data;
Jorma Rissanen and Boris Ryabko for discussions, John Langford
for suggestions, Tzu-Kuo Huang
for pointing out some typos and simplifications, and 
Teemu Roos and Henri Tirry for implementing a visualization
of the clustering process.

D.~Benedetto, E.~Caglioti, and V.~Loreto.
Language trees and zipping, {\em Physical Review Letters},
88:4(2002) 048702.

\bibitem{BCL02b}
Ph.~Ball. 
Algorithm makes tongue tree, {\em Nature}, 22 January,
2002.

\bibitem{BKWMKP00}
T. Belloni, M. Klein-Wolt, M. M\'endez, M. van der Klis, J. van Paradijs,
A model-independent analysis of the variability of GRS 1915+105,
{\em Astronomy and Astrophysics}, 355(2000), 271--290.

\bibitem{BGLVZ98}
C.H.~Bennett, P.~G\'acs, M. Li, P.M.B.~Vit\'anyi, and W.~Zurek.
Information Distance, {\em IEEE Transactions on Information Theory},
44:4(1998), 1407--1423.


\bibitem{BLM03}
C.H. Bennett, M. Li, B. Ma, Chain letters and evolutionary histories,
{\em Scientific American}, June 2003, 76--81.

\bibitem{Br00}
D.~Bryant, V.~Berry, P.~Kearney, M.~Li, T.~Jiang,
T.~Wareham and H.~Zhang. A practical algorithm for
recovering the best supported edges of an evolutionary tree.
{\em Proc. 11th  ACM-SIAM Symposium on Discrete Algorithms}, 
January 9--11, 2000,
San Francisco, California, USA,
287--296, 2000.

\bibitem{Cao1998}
Y. Cao, A. Janke, P. J. Waddell, M. Westerman,
O. Takenaka, S. Murata, N. Okada, S. P\"a\"abo, M. Hasegawa,
Conflict among individual mitochondrial proteins in resolving the
phylogeny of Eutherian orders,
{\em J. Mol. Evol.}, 47(1998), 307-322.

\bibitem{SID}
X. Chen, B. Francia, M. Li, B. McKinnon, A. Seker,
Shared information and program plagiarism detection, 
{\em IEEE Trans. Inform. Th.}, 50:7(2004), 1545--1551.

\bibitem{Ci03}
R. Cilibrasi,
The CompLearn Toolkit, 2003,
 http://complearn.sourceforge.net/   .

\bibitem{CVW03}
R. Cilibrasi, P.M.B. Vit\'anyi, R. de Wolf, Algorithmic clustering of music,
{\em Computer Music Journal}, To appear.
http://xxx.lanl.gov/abs/cs.SD/0303025

\bibitem{CPSV00}
G. Cormode, M. Paterson, S. Sahinalp, and U. Vishkin. 
Communication complexity of document exchange. 
In {\em Proc. 11th ACM--SIAM Symp. on Discrete Algorithms}, 2000,
197--206.

                                                                                
\bibitem{CT91}
T.M. Cover and J.A. Thomas.
\newblock {\em Elements of Information Theory}.
\newblock Wiley \& Sons, 1991.
                                                                                


\bibitem{CVfolk}
W.~Chai and B.~Vercoe.
Folk music classification using hidden Markov models.
{\em Proc.~of International Conference on Artificial Intelligence}, 2001.

\bibitem{CF02}
M.~Cooper and J.~Foote.
Automatic music summarization via similarity analysis,
{\em Proc.~IRCAM}, 2002.

\bibitem{DTWml}
R.~Dannenberg, B.~Thom, and D.~Watson. 
A machine learning approach to musical style recognition,
{\em Proc.~International Computer Music Conference}, pp. 344-347, 1997.

\bibitem{DHS}
R.O. Duda, P.E. Hart, D.G. Stork, {\em Pattern Classification},
2nd Edition, Wiley Interscience, 2001. 

\bibitem{GKCwavelet}
M.~Grimaldi, A.~Kokaram, and P.~Cunningham.
Classifying music by genre using the wavelet packet transform
and a round-robin ensemble.
Technical report TCD-CS-2002-64, Trinity College Dublin, 2002.
http://www.cs.tcd.ie/publications/tech-reports/reports.02/TCD-CS-2002-64.pdf

\bibitem{JMWWA02}
A. Janke, O. Magnell, G. Wieczorek, M. Westerman, U. Arnason,
Phylogenetic analysis of 18S rRNA and the mitochondrial
genomes of wombat, Vombatus ursinus, and the spiny
anteater, Tachyglossus acelaetus: increased support for
the Marsupionta hypothesis,
{\em J. Mol. Evol.}, 1:54(2002), 71--80.

\bibitem{Ji01}
T.~Jiang, P.~Kearney, and M.~Li.
A Polynomial Time Approximation Scheme for Inferring Evolutionary Trees from
Quartet Topologies and its Application.
{\em SIAM J. Computing}, 30:6(2001), 1942--1961.

\bibitem{Ke04}
E. Keogh, S. Lonardi, and C.A. Rtanamahatana, Toward parameter-free
data mining, In: {\em Proc. 10th ACM SIGKDD Intn'l Conf. Knowledge
Discovery and Data Mining}, Seattle, Washington, USA, August 22---25, 2004,
206--215.

\bibitem{KBSMJ01}
J.K. Killian, T.R. Buckley, N. Steward, B.L. Munday, R.L. Jirtle,
Marsupials and Eutherians reunited: genetic evidence for the Theria
hypothesis of mammalian evolution, {\em Mammalian Genome}, 12(2001),
513--517.

\bibitem{KAS03}
M. Koppel, S. Argamon, A.R. Shimoni, Automatic catagorizing
written texts by author gender, {\em Literary and Linguistic
Computing}, To appear.

\bibitem{KSAG03}
A. Kraskov, H. St\"ogbauer, R.G. Adrsejak, P. Grassberger,
Hierarchical clustering based on mutual information, 2003,
http://arxiv.org/abs/q-bio/0311039

\bibitem{Kr64}
J.B. Kruskal,
Nonmetric multidimensional scaling: a numerical method, {\em Psychometrika},
29(1964), 115--129.


\bibitem{SA03}
T.G. Ksiazek, et.al.,
A Novel Coronavirus Associated with Severe Acute Respiratory Syndrome,
{\em New England J. Medicine}, Published at www.nejm.org April 10, 
2003 (10.1056/NEJMoa030781).

%\bibitem{KH98}
%S. Kumar, B. Hodges, A molecular timescale for vertebrate evolution,
%{\em Nature}, 392(1998), 30 April, 917--919.

                                                                                
\bibitem{KS01}
C.P. Kurtzman,  J. Sugiyama, Ascomycetous yeasts and yeast-like taxa.
In: {\em The mycota VII, Systemtics and evolution, part A}, pp. 179-200, 
Springer-Verlag, Berlin, 2001.

                                                                                
\bibitem{Ku03}
C.P. Kurtzman, Phylogenetic circumscription of Saccharomyces,
Kluyveromyces and other members of the Saccharomycetaceaea, 
and the proposal of the new genera Lachnacea, 
Nakaseomyces, Naumovia, Vanderwaltozyma and
Zygotorulaspora, {\em FEMS Yeast Res.}, 4(2003), 233--245.

\bibitem{La51}
P.S. Laplace, {\it A philosophical essay on probabilities}, 1819.
English translation, Dover, 1951.


\bibitem{LBCKKZ01}
M.~Li, J.H.~Badger, X.~Chen, S.~Kwong, P.~Kearney, and H.~Zhang.
An information-based sequence distance and its application
to whole mitochondrial genome phylogeny,
{\em Bioinformatics}, 17:2(2001), 149--154.

\bibitem{Li01}
M.~Li and P.M.B.~Vit\'anyi.
Algorithmic Complexity,
pp.~376--382 in: {\em International Encyclopedia
of the Social \& Behavioral Sciences},
N.J.~Smelser and P.B.~Baltes, Eds., Pergamon, Oxford, 2001/2002.

\bibitem{Li03}
M. Li, X. Chen, X.~Li, B.~Ma, P.M.B.~Vit\'anyi.
The similarity metric, {\em IEEE Trans. Inform. Th.}, 50:12(2004),
3250- 3264.

\bibitem{LiVi97}
M.~Li and P.M.B.~Vit\'anyi.
{\em An Introduction to Kolmogorov Complexity
and its Applications}, Springer-Verlag, New York, 2nd Edition, 1997.

\bibitem{LLB03}
A. Londei, V. Loreto, M.O. Belardinelli,
Music style and authorship categorization by informative compressors,
Proc. 5th Triannual Conference of the European Society for the Cognitive
Sciences of Music (ESCOM), September 8-13, 2003, 
Hannover, Germany, pp. 200-203.

\bibitem{OSBS02}
L.S. Oliveira, R. Sabourin, F. Bortolozzi, C.Y. Suen,
Automatic recognition of handwritten numerical strings: A recognition
and verification strategy, {\em IEEE Trans. Pattern Analysis
and Machine Intelligence}, 24:11(2002), 1438--1454.

                                                                                
\bibitem{rights}
United Nations General Assembly resolution 217 A (III) of 10 December 1948:
Universal Declaration of Human Rights, http://www.un.org/Overview/rights.html

\bibitem{RWKC03}
A. Rokas, B.L. Williams, N. King, S.B. Carroll,
Genome-scale approaches to resolving incongruence in molecular
phylogenies, {\em Nature}, 425(2003), 798--804 (25 October 2003).

\bibitem{Sa97}
D. Salomon, {\em Data Compression}, Springer-Verlag, New York, 1997.

\bibitem{SN87}
N. Saitou, M. Nei, The neighbor-joining method: a new method for
reconstructing phylogenetic trees, {\em Mol. Biol. Evol.}, 4(1987), 406--425.

\bibitem{Sneural}
P.~Scott.
Music classification using neural networks, 2001.\\
http://www.stanford.edu/class/ee373a/musicclassification.pdf


\bibitem{DJT96}
\O. D. Trier, A.K. Jain, T. Taxt, Feature extraction methods for
character recognition---A survey, {\em Pattern Recognition},
29:4(1996), 641--662.

\bibitem{Ya02}
P.N. Yianilos, Normalized forms for two common metrics,
NEC Research Institute, Report 91-082-9027-1, 1991, Revision
7/7/2002. http://www.pnylab.com/pny/


\bibitem{YPYG03}
     A. C.-C. Yang, C.-K. Peng, H.-W. Yien, A.L. Goldberger,
      Information categorization approach to literary authorship disputes,
      {\em Physica A}, 329(2003), 473-483.


\bibitem{TC02}
G.~Tzanetakis and P.~Cook, Music genre classification of audio signals,
{\em IEEE Transactions on Speech and Audio Processing},
10(5):293--302, 2002.


\bibitem{We04}
S. Wehner, Analyzing network traffic and worms using compression,
Manuscript, CWI, 2004. Partially available
at http://homepages.cwi.nl/~wehner/worms/





