Privacy Policy Cookie Policy Terms and Conditions Wikipedia:Helferlein/Chemie-Übersetzungsskript - Wikipedia

Wikipedia:Helferlein/Chemie-Übersetzungsskript

aus Wikipedia, der freien Enzyklopädie

Auf dieser Seite findet sich ein Übersetzungsscript, das die Tabellen zu den chemischen Elementen von Englisch nach Deutsch übersetzen kann, sowie zwei zugehörige Hilfsdateien. Vor dem Start des Scriptes müssen zwei Unterverzeichnisse "en" und "de" angelegt werden. Beim ersten Programmstart wird die gesamte Datenbank durchsucht, was einige Minuten dauern kann. Bei weiteren Starts wird nur noch ein Auszug der Datenbank abgearbeitet, was mit einem schnellen Rechner ca. 10 Sekunden dauert.



[Bearbeiten] Datei "elem-de.txt"

Wasserstoff
Helium
Lithium
Beryllium
Bor
Kohlenstoff
Stickstoff
Sauerstoff
Fluor
Neon
Natrium
Magnesium
Aluminium
Silizium
Phosphor
Schwefel
Chlor
Argon
Kalium
Calcium
Scandium
Titan_(Element)
Vanadium
Chrom
Mangan
Eisen
Kobalt
Nickel 
Kupfer
Zink_(Element)
Gallium
Germanium
Arsen
Selen
Brom
Krypton
Rubidium
Strontium
Yttrium
Zirkonium
Niob
Molybdän
Technetium
Ruthenium
Rhodium
Palladium
Silber
Cadmium
Indium
Zinn
Antimon
Tellur
Jod
Xenon
Cäsium
Barium
Hafnium
Tantal
Wolfram
Rhenium
Osmium
Iridium
Platin
Gold
Quecksilber
Thallium
Blei
Wismut
Polonium
Astat
Radon
Francium
Radium
Rutherfordium
Dubnium
Seaborgium
Bohrium
Hassium
Meitnerium
Darmstadtium
Unununium
Ununbium
Ununtrium
Ununquadium
Ununpentium
Ununhexium
Ununseptium
Ununoctium
Lanthan
Cer
Praseodym
Neodym
Promethium
Samarium
Europium
Gadolinium
Terbium
Dysprosium
Holmium
Erbium
Thulium
Ytterbium
Lutetium
Actinium
Thorium
Protactinium
Uran
Neptunium
Plutonium
Americium
Curium
Berkelium
Californium
Einsteinium
Fermium
Mendelevium
Nobelium
Lawrencium

[Bearbeiten] Das Übersetzungsscript

#!/usr/bin/perl

use bytes;

$cur_table_en="./cur_table_en.sql";

@trans = (
        "aluminum", "Aluminium",
        "\\[\\[Image:([^\\|\\]]*).*?\\]\\]", "[[Bild:\$1]]",
        "scientific notation", "Wissenschaftliche Notation",
        "\\Q[[Periodic table/Standard Table|Full table]]\\E", "[[Periodensystem]]",
        "General", "Allgemein",
        "List of elements by name", "Liste der chemischen Elemente nach dem Namen",
        "List of elements by symbol", "Liste der chemischen Elemente nach Symbol",
        "List of elements by number", "Liste der chemischen Elemente nach der Ordnungszahl",
        "Number", "Ordnungszahl",
        "\\[\\[(periodic table|chemical) series.*?\\]\\]", "[[Serie des Periodensystems|Serie]]",
        "\\Q[[Alkali metal]]\\Es?", "[[Alkalimetalle]]",
        "\\Q[[transition metal]]\\Es?", "[[Übergangsmetalle]]",
        "\\Q[[Noble gas]]\\E(es)?", "[[Edelgase]]",
        "\\Q[[metalloid]]\\Es?", "[[Halbmetalle]]",
        "\\Q[[True metal]]\\Es?", "[[Metalle]]",
        "\\Q[[Actinide]]\\Es?", "[[Actinide]]",
        "\\Q[[Lanthanide]]\\Es?", "[[Lanthanide]]",
        "\\Q[[Halogen]]\\Es?", "[[Halogene]]",
        "\\[\\[alkali(ne)? earth metal\\]\\]s?", "[[Erdalkalimetalle]]",
        "\\[\\[nonmetals?\\]\\]s?", "[[Nichtmetalle]]",
        "periodic table group", "Gruppe des Periodensystems",
        "periodic table period", "Periode des Periodensystems",
        "periodic table block", "Block des Periodensystems",
        "group (\\d+) element", "Gruppe-\$1-Element",
        "Group", "Gruppe",
        "period (\\d+) element", "Periode-\$1-Element",
        "\\bPeriod\\b", "Periode",
        "\\b-block", "-Block",
        "atomic orbital", "Atomorbital",
        "orbital", "Orbital",
        "Density", "Dichte",
        "Mohs hardness scale\\|Hardness", "Mohshärte",
        "kilogram per cubic met(re|er)", "Kilogramm pro Kubikmeter",
        "\\bcolou?r(?!=)\\b", "Farbe",
        "in appearance", "im Aussehen",
        "Appearance", "Aussehen",
        "lustrous metallic", "metallisch glänzend",
        "lust(rous|er|re)", "glänzend",
        "metallic", "metallisch",
        "bluish tinge", "bläulicher Farbton",
        "blue?ish", "bläulich",
        "white(?!\")", "weiß",
        "with (a )?gr(e|a)y(ish)? tinge", "mit einem gräulichen Farbton",
        "silvery tinge", "silbrig",
        "yellowish tinge", "gelblicher Farbton",
        "lemon ", "",
        "silvery", "silbrig",
        "gr(e|a)y steel", "stahlgrau",
        "gr(e|a)yish", "gräulich",
        "gr(e|a)y", "grau",
        "violet-dark", "dunkel-violett",
        "dark", "dunkel",
        "bright", "hell",
        "black(?!\\\\)", "schwarz",
        "red-brown", "rot-braun",
        "reddish", "rötlich",
        "\\bred\\b(?!\\\\)", "rot",
        "pale yellowish green gas", "blasses gelblich-grünes Gas",
        "yellowish green", "gelblich-grün",
        "pale greenish-yellow gas", "blasses grünlich-gelbliches Gas",
        "yellow(?!\\\\)", "gelb",
        "blue(?!\\\\)", "blau",
        "pale", "blass",
        "colorless", "farblos",
        "graphite", "Graphit",
        "diamond", "Diamant",
        "Atomic Properties", "Atomar",
        "Atomic weight", "Atomgewicht",
        "Atomic mass unit", "Atomare Masseeinheit",
        "Atomic radius", "Atomradius",
        "\\bcalc\\.?(?=\\W)", "berechnet",
        "picomet(re|er)", "Picometer",
        "Covalent radius", "Kovalenter Radius",
        "van der Waals radius", "van der Waals-Radius",
        "no (information|data)", "k.A.",
        "Electron configuration", "Elektronenkonfiguration",
        "\\Q[[electron|e<sup>-</sup>]]\\E", "[[Elektron|e]]<sup>-</sup>",
        "\\Q[[electron|e]]<sup>-</sup>\\E", "[[Elektron|e]]<sup>-</sup>",
        "per \\Q[[energy level]]\\E", "pro [[Energieniveau]]",
        "\\Q[[Oxidation state]]\\Es? \\Q([[Oxide]])\\E",
                "[[Oxidationszustand|Oxidationszustände]] ([[Oxid]])",
        "amphoteric", "amphoter",
        "mildly (\\Q[[acid]]\\Eic|acidic)", "leicht [[Säure|sauer]]",
        "strong (\\Q[[acid]]\\E|acid)", "stark [[Säure|sauer]]",
        "mildly (\\Q[[base|basic]]\\E|basic)", "leicht [[Base|basisch]]",
        "strong (\\Q[[base]]\\E|base)", "stark [[Base|basisch]]",
        "weak (\\Q[[base]]\\E|base)", "schwach [[Base|basisch]]",
        "weak (\\Q[[acid]]\\E|acid)", "schwach [[Säure|sauer]]",
        "forms", "Modifikationen",
        "Crystal structure", "Kristallstruktur",
        "(body centered cubic|Cubic\\,? body( |-)centered)", "kubisch raumzentriert",
        "(face centered cubic|Cubic\\,? face( |-)centered)", "kubisch flächenzentriert",
        "Rhombohedral", "rhomboedrisch",
        "Orthorhombic", "orthorhombisch",
        "Hexagonal", "hexagonal",
        "Tetragonal", "tetragonal",
        "Monoclinic", "monoklin",
        "Physical Properties", "Physikalisch",
        "\\Q[[State of matter]]\\E",  "[[Aggregatzustand]] ([[Magnetismus]])",
        "(a )?solid", "fest",
        "liquid", "flüssig",
        "\\bmagnetism\\b", "Magnetismus",
        "ferromagnetic", "ferromagnetisch",
        "paramagnetic", "paramagnetisch",
        "diamagnetic", "diamagnetisch",
        "non?magnetic", "unmagnetisch",
        "Melting point", "Schmelzpunkt",
        "Boiling point", "Siedepunkt",
        "Molar volume", "Molares Volumen",
        "cubic met(re|er) per mole", "Kubikmeter pro Mol",
        "Heat of vaporization", "Verdampfungswärme",
        "kilojoule per mole", "Kilojoule pro Mol",
        "Heat of fusion", "Schmelzwärme",
        "\\Q[[sublimation|sublimes]]\\E", "[[Sublimation|sublimiert]]",
        "Vapor pressure", "Dampfdruck",
        "Pascal( \\(unit\\))?", "Pascal (Einheit)",
        "(Speed|Velocity) of sound", "Schallgeschwindigkeit",
        "met(re|er) per second", "Meter pro Sekunde",
        "Miscellaneous", "Verschiedenes",
        "Electronegativity", "Elektronegativität",
        "Pauling scale", "Pauling-Skala",
        "Specific heat capacity", "Spezifische Wärmekapazität",
        "joule per (kilo)?gram-kelvin", "Joule pro Kilogramm und Kelvin",
        "Electrical conductivity", "Elektrische Leitfähigkeit",
        "ohm", "Ohm",
        "Thermal conductivity", "Wärmeleitfähigkeit",
        "watt per met(re|er)-kelvin", "Watt pro Meter und Kelvin",
        "1<sup>st</sup> \\Q[[ionization potential]]\\E", "1. [[Ionisierungsenergie]]",
        "2<sup>nd</sup> ionization potential", "2. Ionisierungsenergie",
        "3<sup>rd</sup> ionization potential", "3. Ionisierungsenergie",
        "<sup>th</sup> ionization potential", ". Ionisierungsenergie",
        "\\bIsotope\\b", "Isotop",
        "Most Stable Isotopes", "Stabilste Isotope",
        "natural abundance\\|NA", "Natürliche Häufigkeit|NH",
        "Longest \\[\\[half life", "längste [[Halbwertszeit",
        "half-life", "Halbwertszeit|t<sub>1/2</sub>",
        "decay mode\\|DM", "Zerfallsmodus|ZM",
        "decay energy\\|DE", "Zerfallsenergie|ZE",
        "electron volt", "Elektronenvolt",
        "decay product\\|DP", "Zerfallsprodukt|ZP",
        "stable isotop", "Stabiles Isotop", # oben wurde ersetzt: isotope -> isotop
        "stable", "stabil",
        "\\Q[[trace radioisotope|trace]]\\E", "[[radioaktives Spurenelement|in Spuren]]",
        "\\Q[[neutron]]s\\E", "[[Neutron]]en",
        "neutron emission", "Neutronen-Emission",
        "neutron", "Neutron",
        "neutrons", "Neutronen",
        "synthetic radioisotope", "Synthetisches Radioisotop",
        "meta state", "metastabiler Zustand",
        "isomeric transition", "Isomerie-Übergang",
        "\\[e(lectron)? capture", "[Elektronen-Einfang",
        "e(lectron)? capture", "e<sup>-</sup>-Einfang",
        "capture", "Einfang",
        "double beta (minus )?decay", "Doppelter Betazerfall",
        "Double \\β<sup>-</sup>", "β<sup>-</sup>β<sup>-</sup>",
        "beta (emission|(minus )?decay)", "Beta-Strahlung",
        "electron emission", "Elektronen-Emission",
        "alpha (emission|decay)", "Alpha-Strahlung",
        "Known properties", "bekannte Eigenschaften",
        "predicted properties", "vorhergesagte Eigenschaften",
        "a guess", "Schätzung",
        "based( on| upon)?", "basierend auf",
        "\\bis\\b", "ist",
        "\\bat\\b", "bei",
        "a gas", "gasförmig",
        "(?-i)\\bgas\\b", "gasförmig",
        "\\bor\\b", "oder",
        "\\band\\b", "und",
        "\\bbut\\b", "aber",
        "\\bnone\\b", "-",
        "\\biso\\b", "Isotop",
        "(?-i)NA", "k.A.",
        "(?-i)U/K", "k.A.",
        "with", "mit",
        "\\[\\[[^\\[]*SI\\]\\] units & \\[\\[[^\\[]*STP\\]\\] are used except where noted\\.?",
                "[[SI-Einheitensystem|SI]]-Einheiten und [[Standardbedingungen]] werden benutzt,<br>sofern nicht anders angegeben.",
        "years", "Jahre",
        "year", "Jahr",
        "days", "Tage",
        "day", "Tag",
        "hours", "Stunden",
        "hour", "Stunde",
        "minutes", "Minuten",
        "minute", "Minute",
        "seconds", "Sekunden",
        "second", "Sekunde",
        "cubic", "kubisch",
        "(not(-| )|un)known", "k.A.",
        "not applicable", "k.A.",
        "n/a", "k.A.",
        "data not available", "k.A.",
        "probably", "wahrscheinlich",
        "presumably", "vermutlich",
        "usually", "gewöhnlich",
        "Physical", "Physikalisch",
        "Atomic", "Atomar"
);
map { s#/#\\/#g } @trans;

sub load {
        my ($file, $fulltext, $filter) = @_;
        my $rere = "\\(z,z,s,s,s,z,s,s,s,z,z,z,z,z,s,s\\)";
        $rere =~ s/z/(.*?)/g;
        $rere =~ s/s/'(|.*?[^\\\\])'/g;
        open IN, $file;
        while(<IN>) { while(/$rere/g) {
                next unless($2==0);
                next if(defined $filter && !defined $$filter{$3});
                $txt=$4;
                for(@tmp=(),$i=1;$i<17;$i++) { push @tmp, $i==4?"":$$i; }
                push @cur_en, [@tmp];
                if($fulltext||$11==1) {
                        $cur_en[-1]->[3] = $txt;
                } else {
                        while($txt =~ /(\[\[.*?\]\])/g) {
                                $cur_en[-1]->[3] .= $1;
                        }
                }
        }}
        map { $cur_en{$_->[2]} = $_ } @cur_en;
}

sub save {
        my ($file, $dat) = shift;
        open OUT, ">$file";
        foreach $rec (@cur_en) {
                printf OUT "(%s,%s,'%s','%s','%s',%s,'%s','%s','%s',%s,%s,%s,%s,%s,'%s','%s')\n", @$rec;
        }
}

$xx="de";
open IN, "elem-en.txt"; chop(@en=<IN>);
open IN, "elem-$xx.txt"; chop(@xx=<IN>);
map { $filter{$_} = 1 } @en;
if(!-r "$cur_table_en") {
        print "cannot read $cur_table_en";
        exit 1;
}
if(!-r "en_chemEl" || (stat "$cur_table_en")[9]>(stat "en_chemEl")[9]) {
        load("$cur_table_en", 1, \%filter);
        save("en_chemEl", $cur_en);
} else {
        load("en_chemEl", 1);
}

sub K2C {       # Kelvin->Celsius
        my ($K, $Krem) = @_;
        if(defined $Krem) {
                $l = length($Krem)-1;
                sprintf("%.${l}f", $K-273.15);
        } else {
                $K-273;
        }
}

open IN, "elem-en.txt";
while($elem_en=<IN>) {
        chop $elem_en;
        if(defined $cur_en{$elem_en}) {
                $txt = $cur_en{$elem_en}->[3];
                open OUT, ">en/$elem_en";
                print OUT "$txt";
                if($txt =~ s/^.*?(<table.*?>.*<\/table.*?>).*?$/$1/i) {
                        for($i=0; $i<@en; $i++) {
                                $elem_xx= $xx[$i] if $elem_en eq $en[$i];
                                if($xx[$i] =~ /(.*)_.*/) {
                                        $xx_short = $1;
                                        $txt =~ s/\[\[$en[$i](\W)/[[$xx[$i]$1/ig;
                                        $txt =~ s/([^\\[\w])$en[$i](\W)/$1$xx_short$2/ig;
                                } elsif($en[$i] =~ /(.*)_(.*)/) {
                                        $en2 = "$1 $2";
                                        $en_short = $1;
                                        $txt =~ s/(\W)\Q$en[$i]\E(\W)/$1$xx[$i]$2/ig;
                                        $txt =~ s/(\W)\Q$en2\E(\W)/$1$xx[$i]$2/ig;
                                        $txt =~ s/(\W)$en_short(\W)/$1$xx[$i]$2/ig;
                                } else {
                                        $txt =~ s/(\W)$en[$i](\W)/$1$xx[$i]$2/ig;
                                }
                        }
                        for($i=0; $i<@trans; $i+=2) { eval "\$txt =~ s/$trans[$i]/$trans[$i+1]/ig";        }
                        $txt =~ s/(\d+(\.\d+)?) ?\Q[[Kelvin|K]]\E \(.*?\)/"$1 [[Kelvin|K]] (".K2C($1,$2)."°[[Celsius|C]])"/ge;
                        $txt =~ s/(\d+(\.\d+)?) ?K \(.*?\)/"$1 K (".K2C($1,$2)."°C)"/ge;
                        $txt =~ s/\[\[[^\[]*\|(\d+(\.\d+)?) ?K\]\] ?\(.*?\)/"$1 K (".K2C($1,$2)."°C)"/ge;
                        $txt =~ s/\\'/'/g; $txt =~ s/\\"/"/g; $txt =~ s/\\n/\n/g; $txt =~ s/\\r//g;
                        $txt =~ s/^(<[^>]*>)/$1\n<caption><font size="+1">'''Eigenschaften'''<\/font><\/caption>/s;
                        open OUT, ">$xx/$elem_xx";
                        print OUT "$txt";
                        open OUT, ">$xx/$elem_xx.html";
                        print OUT "<html><body>$txt</body></html>\n";
                } else {
                        print "no table in \"$elem_en\"\n";
                }
        } else {
                print "no article \"$elem_en\"\n";
        }
}

Static Wikipedia 2008 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -