Wikipedia:Helferlein/Chemie-Übersetzungsskript
aus Wikipedia, der freien Enzyklopädie
Auf dieser Seite findet sich ein Übersetzungsscript, das die Tabellen zu den chemischen Elementen von Englisch nach Deutsch übersetzen kann, sowie zwei zugehörige Hilfsdateien. Vor dem Start des Scriptes müssen zwei Unterverzeichnisse "en" und "de" angelegt werden. Beim ersten Programmstart wird die gesamte Datenbank durchsucht, was einige Minuten dauern kann. Bei weiteren Starts wird nur noch ein Auszug der Datenbank abgearbeitet, was mit einem schnellen Rechner ca. 10 Sekunden dauert.
[Bearbeiten] Datei "elem-de.txt"
Wasserstoff Helium Lithium Beryllium Bor Kohlenstoff Stickstoff Sauerstoff Fluor Neon Natrium Magnesium Aluminium Silizium Phosphor Schwefel Chlor Argon Kalium Calcium Scandium Titan_(Element) Vanadium Chrom Mangan Eisen Kobalt Nickel Kupfer Zink_(Element) Gallium Germanium Arsen Selen Brom Krypton Rubidium Strontium Yttrium Zirkonium Niob Molybdän Technetium Ruthenium Rhodium Palladium Silber Cadmium Indium Zinn Antimon Tellur Jod Xenon Cäsium Barium Hafnium Tantal Wolfram Rhenium Osmium Iridium Platin Gold Quecksilber Thallium Blei Wismut Polonium Astat Radon Francium Radium Rutherfordium Dubnium Seaborgium Bohrium Hassium Meitnerium Darmstadtium Unununium Ununbium Ununtrium Ununquadium Ununpentium Ununhexium Ununseptium Ununoctium Lanthan Cer Praseodym Neodym Promethium Samarium Europium Gadolinium Terbium Dysprosium Holmium Erbium Thulium Ytterbium Lutetium Actinium Thorium Protactinium Uran Neptunium Plutonium Americium Curium Berkelium Californium Einsteinium Fermium Mendelevium Nobelium Lawrencium
[Bearbeiten] Das Übersetzungsscript
#!/usr/bin/perl use bytes; $cur_table_en="./cur_table_en.sql"; @trans = ( "aluminum", "Aluminium", "\\[\\[Image:([^\\|\\]]*).*?\\]\\]", "[[Bild:\$1]]", "scientific notation", "Wissenschaftliche Notation", "\\Q[[Periodic table/Standard Table|Full table]]\\E", "[[Periodensystem]]", "General", "Allgemein", "List of elements by name", "Liste der chemischen Elemente nach dem Namen", "List of elements by symbol", "Liste der chemischen Elemente nach Symbol", "List of elements by number", "Liste der chemischen Elemente nach der Ordnungszahl", "Number", "Ordnungszahl", "\\[\\[(periodic table|chemical) series.*?\\]\\]", "[[Serie des Periodensystems|Serie]]", "\\Q[[Alkali metal]]\\Es?", "[[Alkalimetalle]]", "\\Q[[transition metal]]\\Es?", "[[Übergangsmetalle]]", "\\Q[[Noble gas]]\\E(es)?", "[[Edelgase]]", "\\Q[[metalloid]]\\Es?", "[[Halbmetalle]]", "\\Q[[True metal]]\\Es?", "[[Metalle]]", "\\Q[[Actinide]]\\Es?", "[[Actinide]]", "\\Q[[Lanthanide]]\\Es?", "[[Lanthanide]]", "\\Q[[Halogen]]\\Es?", "[[Halogene]]", "\\[\\[alkali(ne)? earth metal\\]\\]s?", "[[Erdalkalimetalle]]", "\\[\\[nonmetals?\\]\\]s?", "[[Nichtmetalle]]", "periodic table group", "Gruppe des Periodensystems", "periodic table period", "Periode des Periodensystems", "periodic table block", "Block des Periodensystems", "group (\\d+) element", "Gruppe-\$1-Element", "Group", "Gruppe", "period (\\d+) element", "Periode-\$1-Element", "\\bPeriod\\b", "Periode", "\\b-block", "-Block", "atomic orbital", "Atomorbital", "orbital", "Orbital", "Density", "Dichte", "Mohs hardness scale\\|Hardness", "Mohshärte", "kilogram per cubic met(re|er)", "Kilogramm pro Kubikmeter", "\\bcolou?r(?!=)\\b", "Farbe", "in appearance", "im Aussehen", "Appearance", "Aussehen", "lustrous metallic", "metallisch glänzend", "lust(rous|er|re)", "glänzend", "metallic", "metallisch", "bluish tinge", "bläulicher Farbton", "blue?ish", "bläulich", "white(?!\")", "weiß", "with (a )?gr(e|a)y(ish)? tinge", "mit einem gräulichen Farbton", "silvery tinge", "silbrig", "yellowish tinge", "gelblicher Farbton", "lemon ", "", "silvery", "silbrig", "gr(e|a)y steel", "stahlgrau", "gr(e|a)yish", "gräulich", "gr(e|a)y", "grau", "violet-dark", "dunkel-violett", "dark", "dunkel", "bright", "hell", "black(?!\\\\)", "schwarz", "red-brown", "rot-braun", "reddish", "rötlich", "\\bred\\b(?!\\\\)", "rot", "pale yellowish green gas", "blasses gelblich-grünes Gas", "yellowish green", "gelblich-grün", "pale greenish-yellow gas", "blasses grünlich-gelbliches Gas", "yellow(?!\\\\)", "gelb", "blue(?!\\\\)", "blau", "pale", "blass", "colorless", "farblos", "graphite", "Graphit", "diamond", "Diamant", "Atomic Properties", "Atomar", "Atomic weight", "Atomgewicht", "Atomic mass unit", "Atomare Masseeinheit", "Atomic radius", "Atomradius", "\\bcalc\\.?(?=\\W)", "berechnet", "picomet(re|er)", "Picometer", "Covalent radius", "Kovalenter Radius", "van der Waals radius", "van der Waals-Radius", "no (information|data)", "k.A.", "Electron configuration", "Elektronenkonfiguration", "\\Q[[electron|e<sup>-</sup>]]\\E", "[[Elektron|e]]<sup>-</sup>", "\\Q[[electron|e]]<sup>-</sup>\\E", "[[Elektron|e]]<sup>-</sup>", "per \\Q[[energy level]]\\E", "pro [[Energieniveau]]", "\\Q[[Oxidation state]]\\Es? \\Q([[Oxide]])\\E", "[[Oxidationszustand|Oxidationszustände]] ([[Oxid]])", "amphoteric", "amphoter", "mildly (\\Q[[acid]]\\Eic|acidic)", "leicht [[Säure|sauer]]", "strong (\\Q[[acid]]\\E|acid)", "stark [[Säure|sauer]]", "mildly (\\Q[[base|basic]]\\E|basic)", "leicht [[Base|basisch]]", "strong (\\Q[[base]]\\E|base)", "stark [[Base|basisch]]", "weak (\\Q[[base]]\\E|base)", "schwach [[Base|basisch]]", "weak (\\Q[[acid]]\\E|acid)", "schwach [[Säure|sauer]]", "forms", "Modifikationen", "Crystal structure", "Kristallstruktur", "(body centered cubic|Cubic\\,? body( |-)centered)", "kubisch raumzentriert", "(face centered cubic|Cubic\\,? face( |-)centered)", "kubisch flächenzentriert", "Rhombohedral", "rhomboedrisch", "Orthorhombic", "orthorhombisch", "Hexagonal", "hexagonal", "Tetragonal", "tetragonal", "Monoclinic", "monoklin", "Physical Properties", "Physikalisch", "\\Q[[State of matter]]\\E", "[[Aggregatzustand]] ([[Magnetismus]])", "(a )?solid", "fest", "liquid", "flüssig", "\\bmagnetism\\b", "Magnetismus", "ferromagnetic", "ferromagnetisch", "paramagnetic", "paramagnetisch", "diamagnetic", "diamagnetisch", "non?magnetic", "unmagnetisch", "Melting point", "Schmelzpunkt", "Boiling point", "Siedepunkt", "Molar volume", "Molares Volumen", "cubic met(re|er) per mole", "Kubikmeter pro Mol", "Heat of vaporization", "Verdampfungswärme", "kilojoule per mole", "Kilojoule pro Mol", "Heat of fusion", "Schmelzwärme", "\\Q[[sublimation|sublimes]]\\E", "[[Sublimation|sublimiert]]", "Vapor pressure", "Dampfdruck", "Pascal( \\(unit\\))?", "Pascal (Einheit)", "(Speed|Velocity) of sound", "Schallgeschwindigkeit", "met(re|er) per second", "Meter pro Sekunde", "Miscellaneous", "Verschiedenes", "Electronegativity", "Elektronegativität", "Pauling scale", "Pauling-Skala", "Specific heat capacity", "Spezifische Wärmekapazität", "joule per (kilo)?gram-kelvin", "Joule pro Kilogramm und Kelvin", "Electrical conductivity", "Elektrische Leitfähigkeit", "ohm", "Ohm", "Thermal conductivity", "Wärmeleitfähigkeit", "watt per met(re|er)-kelvin", "Watt pro Meter und Kelvin", "1<sup>st</sup> \\Q[[ionization potential]]\\E", "1. [[Ionisierungsenergie]]", "2<sup>nd</sup> ionization potential", "2. Ionisierungsenergie", "3<sup>rd</sup> ionization potential", "3. Ionisierungsenergie", "<sup>th</sup> ionization potential", ". Ionisierungsenergie", "\\bIsotope\\b", "Isotop", "Most Stable Isotopes", "Stabilste Isotope", "natural abundance\\|NA", "Natürliche Häufigkeit|NH", "Longest \\[\\[half life", "längste [[Halbwertszeit", "half-life", "Halbwertszeit|t<sub>1/2</sub>", "decay mode\\|DM", "Zerfallsmodus|ZM", "decay energy\\|DE", "Zerfallsenergie|ZE", "electron volt", "Elektronenvolt", "decay product\\|DP", "Zerfallsprodukt|ZP", "stable isotop", "Stabiles Isotop", # oben wurde ersetzt: isotope -> isotop "stable", "stabil", "\\Q[[trace radioisotope|trace]]\\E", "[[radioaktives Spurenelement|in Spuren]]", "\\Q[[neutron]]s\\E", "[[Neutron]]en", "neutron emission", "Neutronen-Emission", "neutron", "Neutron", "neutrons", "Neutronen", "synthetic radioisotope", "Synthetisches Radioisotop", "meta state", "metastabiler Zustand", "isomeric transition", "Isomerie-Übergang", "\\[e(lectron)? capture", "[Elektronen-Einfang", "e(lectron)? capture", "e<sup>-</sup>-Einfang", "capture", "Einfang", "double beta (minus )?decay", "Doppelter Betazerfall", "Double \\β<sup>-</sup>", "β<sup>-</sup>β<sup>-</sup>", "beta (emission|(minus )?decay)", "Beta-Strahlung", "electron emission", "Elektronen-Emission", "alpha (emission|decay)", "Alpha-Strahlung", "Known properties", "bekannte Eigenschaften", "predicted properties", "vorhergesagte Eigenschaften", "a guess", "Schätzung", "based( on| upon)?", "basierend auf", "\\bis\\b", "ist", "\\bat\\b", "bei", "a gas", "gasförmig", "(?-i)\\bgas\\b", "gasförmig", "\\bor\\b", "oder", "\\band\\b", "und", "\\bbut\\b", "aber", "\\bnone\\b", "-", "\\biso\\b", "Isotop", "(?-i)NA", "k.A.", "(?-i)U/K", "k.A.", "with", "mit", "\\[\\[[^\\[]*SI\\]\\] units & \\[\\[[^\\[]*STP\\]\\] are used except where noted\\.?", "[[SI-Einheitensystem|SI]]-Einheiten und [[Standardbedingungen]] werden benutzt,<br>sofern nicht anders angegeben.", "years", "Jahre", "year", "Jahr", "days", "Tage", "day", "Tag", "hours", "Stunden", "hour", "Stunde", "minutes", "Minuten", "minute", "Minute", "seconds", "Sekunden", "second", "Sekunde", "cubic", "kubisch", "(not(-| )|un)known", "k.A.", "not applicable", "k.A.", "n/a", "k.A.", "data not available", "k.A.", "probably", "wahrscheinlich", "presumably", "vermutlich", "usually", "gewöhnlich", "Physical", "Physikalisch", "Atomic", "Atomar" ); map { s#/#\\/#g } @trans; sub load { my ($file, $fulltext, $filter) = @_; my $rere = "\\(z,z,s,s,s,z,s,s,s,z,z,z,z,z,s,s\\)"; $rere =~ s/z/(.*?)/g; $rere =~ s/s/'(|.*?[^\\\\])'/g; open IN, $file; while(<IN>) { while(/$rere/g) { next unless($2==0); next if(defined $filter && !defined $$filter{$3}); $txt=$4; for(@tmp=(),$i=1;$i<17;$i++) { push @tmp, $i==4?"":$$i; } push @cur_en, [@tmp]; if($fulltext||$11==1) { $cur_en[-1]->[3] = $txt; } else { while($txt =~ /(\[\[.*?\]\])/g) { $cur_en[-1]->[3] .= $1; } } }} map { $cur_en{$_->[2]} = $_ } @cur_en; } sub save { my ($file, $dat) = shift; open OUT, ">$file"; foreach $rec (@cur_en) { printf OUT "(%s,%s,'%s','%s','%s',%s,'%s','%s','%s',%s,%s,%s,%s,%s,'%s','%s')\n", @$rec; } } $xx="de"; open IN, "elem-en.txt"; chop(@en=<IN>); open IN, "elem-$xx.txt"; chop(@xx=<IN>); map { $filter{$_} = 1 } @en; if(!-r "$cur_table_en") { print "cannot read $cur_table_en"; exit 1; } if(!-r "en_chemEl" || (stat "$cur_table_en")[9]>(stat "en_chemEl")[9]) { load("$cur_table_en", 1, \%filter); save("en_chemEl", $cur_en); } else { load("en_chemEl", 1); } sub K2C { # Kelvin->Celsius my ($K, $Krem) = @_; if(defined $Krem) { $l = length($Krem)-1; sprintf("%.${l}f", $K-273.15); } else { $K-273; } } open IN, "elem-en.txt"; while($elem_en=<IN>) { chop $elem_en; if(defined $cur_en{$elem_en}) { $txt = $cur_en{$elem_en}->[3]; open OUT, ">en/$elem_en"; print OUT "$txt"; if($txt =~ s/^.*?(<table.*?>.*<\/table.*?>).*?$/$1/i) { for($i=0; $i<@en; $i++) { $elem_xx= $xx[$i] if $elem_en eq $en[$i]; if($xx[$i] =~ /(.*)_.*/) { $xx_short = $1; $txt =~ s/\[\[$en[$i](\W)/[[$xx[$i]$1/ig; $txt =~ s/([^\\[\w])$en[$i](\W)/$1$xx_short$2/ig; } elsif($en[$i] =~ /(.*)_(.*)/) { $en2 = "$1 $2"; $en_short = $1; $txt =~ s/(\W)\Q$en[$i]\E(\W)/$1$xx[$i]$2/ig; $txt =~ s/(\W)\Q$en2\E(\W)/$1$xx[$i]$2/ig; $txt =~ s/(\W)$en_short(\W)/$1$xx[$i]$2/ig; } else { $txt =~ s/(\W)$en[$i](\W)/$1$xx[$i]$2/ig; } } for($i=0; $i<@trans; $i+=2) { eval "\$txt =~ s/$trans[$i]/$trans[$i+1]/ig"; } $txt =~ s/(\d+(\.\d+)?) ?\Q[[Kelvin|K]]\E \(.*?\)/"$1 [[Kelvin|K]] (".K2C($1,$2)."°[[Celsius|C]])"/ge; $txt =~ s/(\d+(\.\d+)?) ?K \(.*?\)/"$1 K (".K2C($1,$2)."°C)"/ge; $txt =~ s/\[\[[^\[]*\|(\d+(\.\d+)?) ?K\]\] ?\(.*?\)/"$1 K (".K2C($1,$2)."°C)"/ge; $txt =~ s/\\'/'/g; $txt =~ s/\\"/"/g; $txt =~ s/\\n/\n/g; $txt =~ s/\\r//g; $txt =~ s/^(<[^>]*>)/$1\n<caption><font size="+1">'''Eigenschaften'''<\/font><\/caption>/s; open OUT, ">$xx/$elem_xx"; print OUT "$txt"; open OUT, ">$xx/$elem_xx.html"; print OUT "<html><body>$txt</body></html>\n"; } else { print "no table in \"$elem_en\"\n"; } } else { print "no article \"$elem_en\"\n"; } }