Browse Source

cleaned folder structure

bscheibel 4 years ago
parent
commit
a893e8d19e

+ 0 - 5
old/blob_dedection.py

@@ -1,5 +0,0 @@
-import text_to_image
-encoded_image_path = text_to_image.encode_file("/home/bscheibel/PycharmProjects/engineering_drawings_extraction/drawings/5129275_Rev01-GV12.txt", "output_image.png")
-
-import imgkit
-imgkit.from_file('/home/bscheibel/PycharmProjects/engineering_drawings_extraction/drawings/5129275_Rev01-GV12.html', 'out.jpg')

+ 0 - 330
old/blub.csv

@@ -1,330 +0,0 @@
-[187.180925, 815.746851];[213.724746, 815.746851];[187.180925, 833.267817];[213.724746, 833.267817];3
-[217.300913, 816.360079];[244.143154, 816.360079];[217.300913, 835.440719];[244.143154, 835.440719];3
-[248.020901, 815.746851];[326.284694, 815.746851];[248.020901, 833.267817];[326.284694, 833.267817];3
-[1111.191445, 494.016255];[1124.007599, 494.016255];[1111.191445, 521.039792];[1124.007599, 521.039792];1
-[857.632547, 546.096232];[870.4487, 546.096232];[857.632547, 569.279772];[870.4487, 569.279772];1
-[832.072557, 519.30596];[844.888711, 519.30596];[832.072557, 570.839772];[844.888711, 570.839772];1
-[977.620609, 769.211228];[1000.804027, 769.211228];[977.620609, 782.027838];[1000.804027, 782.027838];3
-[591.940763, 812.619667];[677.33106, 812.619667];[591.940763, 831.815352];[677.33106, 831.815352];3
-[967.300113, 797.508989];[1032.901811, 797.508989];[967.300113, 812.147826];[1032.901811, 812.147826];3
-[769.540692, 816.46685];[851.142478, 816.46685];[769.540692, 833.855725];[851.142478, 833.855725];3
-[655.420738, 835.066843];[721.049117, 835.066843];[655.420738, 853.935153];[721.049117, 853.935153];3
-[745.300702, 835.634036];[751.889104, 835.634036];[745.300702, 853.935153];[751.889104, 853.935153];1
-[719.740712, 836.280071];[745.382909, 836.280071];[719.740712, 855.120711];[745.382909, 855.120711];3
-[1563.832702, 193.366806];[1581.806998, 193.366806];[1563.832702, 213.119915];[1581.806998, 213.119915];3
-[1418.610908, 294.223619];[1441.16683, 294.223619];[1418.610908, 310.919876];[1441.16683, 310.919876];3
-[1451.610894, 284.771268];[1474.166716, 284.771268];[1451.610894, 310.319876];[1474.166716, 310.319876];3
-[1532.270923, 352.755985];[1545.087533, 352.755985];[1532.270923, 397.319841];[1545.087533, 397.319841];1
-[1657.540637, 377.651385];[1667.544521, 377.651385];[1657.540637, 390.467994];[1667.544521, 390.467994];3
-[1513.659395, 496.451337];[1577.643206, 496.451337];[1513.659395, 509.267947];[1577.643206, 509.267947];3
-[1295.620462, 613.779747];[1336.738707, 613.779747];[1295.620462, 659.135421];[1336.738707, 659.135421];3
-[1260.580303, 614.619746];[1273.259539, 614.619746];[1260.580303, 633.815431];[1273.259539, 633.815431];1
-[1275.09949, 761.989003];[1314.661698, 761.989003];[1275.09949, 776.74784];[1314.661698, 776.74784];3
-[1409.379436, 651.586916];[1485.023195, 651.586916];[1409.379436, 686.627876];[1485.023195, 686.627876];3
-[1657.900637, 661.091271];[1667.142452, 661.091271];[1657.900637, 673.907881];[1667.142452, 673.907881];3
-[1380.099448, 768.70687];[1450.703209, 768.70687];[1380.099448, 786.347836];[1450.703209, 786.347836];3
-[1658.260637, 802.811214];[1666.726527, 802.811214];[1658.260637, 815.627824];[1666.726527, 815.627824];1
-[978.460609, 826.811205];[1019.882984, 826.811205];[978.460609, 839.627815];[1019.882984, 839.627815];3
-[769.540692, 839.0512];[855.724489, 839.0512];[769.540692, 851.86781];[855.724489, 851.86781];3
-[1657.540637, 519.371328];[1667.544521, 519.371328];[1657.540637, 532.187938];[1667.544521, 532.187938];3
-[1380.099448, 729.106885];[1441.421242, 729.106885];[1380.099448, 764.027845];[1441.421242, 764.027845];3
-[1185.099526, 732.469015];[1227.901758, 732.469015];[1185.099526, 746.267852];[1227.901758, 746.267852];3
-[986.860605, 735.586883];[1013.404455, 735.586883];[986.860605, 753.107849];[1013.404455, 753.107849];3
-[769.540692, 799.331216];[784.148564, 799.331216];[769.540692, 812.147826];[784.148564, 812.147826];3
-[578.860768, 857.411193];[586.56459, 857.411193];[578.860768, 870.227803];[586.56459, 870.227803];1
-[1592.619358, 266.534156];[1598.21495, 266.534156];[1592.619358, 279.347249];[1598.21495, 279.347249];1
-[1186.059526, 704.629026];[1228.861757, 704.629026];[1186.059526, 718.427863];[1228.861757, 718.427863];3
-[947.860621, 708.219709];[961.701948, 708.219709];[947.860621, 727.415394];[961.701948, 727.415394];3
-[775.66069, 748.811236];[784.902505, 748.811236];[775.66069, 761.627846];[784.902505, 761.627846];3
-[475.42081, 787.186862];[567.02468, 787.186862];[475.42081, 804.575736];[567.02468, 804.575736];3
-[472.900811, 807.346854];[532.084584, 807.346854];[472.900811, 842.267814];[532.084584, 842.267814];3
-[1657.900637, 235.931441];[1667.142452, 235.931441];[1657.900637, 248.748051];[1667.142452, 248.748051];3
-[1392.939443, 468.251348];[1402.181258, 468.251348];[1392.939443, 481.067958];[1402.181258, 481.067958];3
-[1296.339481, 507.699789];[1310.180808, 507.699789];[1296.339481, 526.895474];[1310.180808, 526.895474];3
-[1210.311051, 758.495881];[1223.127661, 758.495881];[1210.311051, 799.31968];[1223.127661, 799.31968];1
-[472.900811, 759.946873];[532.084584, 759.946873];[472.900811, 777.335747];[532.084584, 777.335747];3
-[179.020928, 781.066865];[354.702677, 781.066865];[179.020928, 801.360733];[354.702677, 801.360733];3
-[771.100692, 705.946895];[817.564493, 705.946895];[771.100692, 723.587861];[817.564493, 723.587861];3
-[729.820708, 717.851248];[745.204504, 717.851248];[729.820708, 730.667858];[745.204504, 730.667858];3
-[475.42081, 739.786881];[553.104666, 739.786881];[475.42081, 757.175755];[553.104666, 757.175755];3
-[1620.699352, 247.680578];[1631.29553, 247.680578];[1620.699352, 262.080869];[1631.29553, 262.080869];3
-[1435.648734, 401.91759];[1450.527468, 401.91759];[1435.648734, 433.199827];[1450.527468, 433.199827];1
-[1268.859492, 438.819817];[1290.249016, 438.819817];[1268.859492, 487.77549];[1290.249016, 487.77549];1
-[1142.859506, 455.292437];[1185.925189, 455.292437];[1142.859506, 475.42765];[1185.925189, 475.42765];3
-[1111.191445, 613.776202];[1124.007599, 613.776202];[1111.191445, 648.479741];[1124.007599, 648.479741];1
-[729.700705, 700.179712];[743.542031, 700.179712];[729.700705, 719.375397];[743.542031, 719.375397];3
-[637.660745, 717.851248];[653.044541, 717.851248];[637.660745, 730.667858];[653.044541, 730.667858];3
-[698.99261, 733.416162];[711.808764, 733.416162];[698.99261, 774.23969];[711.808764, 774.23969];1
-[223.540911, 733.906883];[347.920337, 733.906883];[223.540911, 754.080751];[347.920337, 754.080751];3
-[1613.979354, 238.909222];[1620.597104, 238.909222];[1613.979354, 251.31131];[1620.597104, 251.31131];1
-[1409.393554, 314.331737];[1432.64683, 314.331737];[1409.393554, 338.399865];[1432.64683, 338.399865];3
-[1130.379548, 379.931384];[1189.683258, 379.931384];[1130.379548, 392.747994];[1189.683258, 392.747994];3
-[804.712568, 433.17628];[817.528721, 433.17628];[804.712568, 460.199816];[817.528721, 460.199816];1
-[752.380677, 581.53152];[786.388708, 581.53152];[752.380677, 603.947852];[786.388708, 603.947852];3
-[484.900806, 707.411253];[518.764594, 707.411253];[484.900806, 720.227863];[518.764594, 720.227863];3
-[1585.179366, 259.574162];[1592.445584, 259.574162];[1585.179366, 272.387255];[1592.445584, 272.387255];1
-[1599.93936, 229.571512];[1611.892307, 229.571512];[1599.93936, 247.188037];[1611.892307, 247.188037];3
-[1628.979348, 194.293396];[1642.179151, 194.293396];[1628.979348, 213.707502];[1642.179151, 213.707502];3
-[734.860706, 864.37119];[775.684505, 864.37119];[734.860706, 877.1878];[775.684505, 877.1878];3
-[1367.631343, 829.176104];[1380.447496, 829.176104];[1367.631343, 869.999652];[1380.447496, 869.999652];1
-[45.581282, 802.811214];[54.047172, 802.811214];[45.581282, 815.627824];[54.047172, 815.627824];1
-[750.820743, 501.133133];[785.536881, 501.133133];[750.820743, 552.587362];[785.536881, 552.587362];3
-[804.689986, 535.717562];[819.328721, 535.717562];[804.689986, 601.319759];[819.328721, 601.319759];1
-[502.672689, 417.816289];[515.488842, 417.816289];[502.672689, 433.199827];[515.488842, 433.199827];3
-[180.820928, 729.491244];[220.924745, 729.491244];[180.820928, 742.307854];[220.924745, 742.307854];3
-[434.980826, 679.211264];[445.804646, 679.211264];[434.980826, 707.747868];[445.804646, 707.747868];1
-[337.540432, 705.21971];[351.381759, 705.21971];[337.540432, 724.415395];[351.381759, 724.415395];3
-[261.820895, 707.411253];[295.684713, 707.411253];[261.820895, 720.227863];[295.684713, 720.227863];3
-[87.701265, 799.691216];[97.705149, 799.691216];[87.701265, 812.507826];[97.705149, 812.507826];3
-[404.740838, 536.171321];[420.124663, 536.171321];[404.740838, 548.987931];[420.124663, 548.987931];3
-[1571.859407, 246.974197];[1582.53141, 246.974197];[1571.859407, 265.067288];[1582.53141, 265.067288];1
-[1419.579432, 354.219851];[1528.12988, 354.219851];[1419.579432, 373.415535];[1528.12988, 373.415535];3
-[1276.17949, 367.059845];[1310.780751, 367.059845];[1276.17949, 386.25553];[1310.780751, 386.25553];3
-[437.500825, 652.331275];[444.428722, 652.331275];[437.500825, 665.147885];[444.428722, 665.147885];1
-[162.820935, 662.531271];[203.644764, 662.531271];[162.820935, 675.34788];[203.644764, 675.34788];3
-[138.580945, 776.771225];[178.684762, 776.771225];[138.580945, 789.587835];[178.684762, 789.587835];3
-[1130.379548, 357.347034];[1226.663298, 357.347034];[1130.379548, 374.735908];[1226.663298, 374.735908];3
-[947.860621, 369.099845];[961.701948, 369.099845];[947.860621, 388.295529];[961.701948, 388.295529];3
-[726.940709, 370.779844];[740.782036, 370.779844];[726.940709, 389.975529];[740.782036, 389.975529];3
-[671.992621, 738.57614];[684.808775, 738.57614];[671.992621, 779.399688];[684.808775, 779.399688];1
-[76.781269, 671.506908];[146.664829, 671.506908];[76.781269, 706.547868];[146.664829, 706.547868];3
-[355.420858, 536.171321];[370.804683, 536.171321];[355.420858, 548.987931];[370.804683, 548.987931];3
-[233.621312, 585.517542];[252.581952, 585.517542];[233.621312, 642.479743];[252.581952, 642.479743];1
-[158.632472, 521.975989];[171.449082, 521.975989];[158.632472, 537.359785];[171.449082, 537.359785];3
-[158.632472, 599.855958];[171.449082, 599.855958];[158.632472, 615.359754];[171.449082, 615.359754];3
-[77.021169, 649.186917];[146.26276, 649.186917];[77.021169, 666.827884];[146.26276, 666.827884];3
-[45.221282, 661.091271];[54.463097, 661.091271];[45.221282, 673.907881];[54.463097, 673.907881];3
-[312.700875, 371.739844];[328.845631, 371.739844];[312.700875, 390.935528];[328.845631, 390.935528];3
-[237.34131, 497.077577];[256.181951, 497.077577];[237.34131, 546.359781];[256.181951, 546.359781];1
-[82.421267, 506.411333];[123.245081, 506.411333];[82.421267, 519.227943];[123.245081, 519.227943];3
-[44.741282, 519.371328];[54.745166, 519.371328];[44.741282, 532.187938];[54.745166, 532.187938];3
-[490.180804, 362.867032];[549.244577, 362.867032];[490.180804, 380.255906];[549.244577, 380.255906];3
-[181.901332, 442.477574];[200.861973, 442.477574];[181.901332, 483.599807];[200.861973, 483.599807];1
-[73.061271, 454.306995];[142.182762, 454.306995];[73.061271, 489.347955];[142.182762, 489.347955];3
-[158.610245, 432.877574];[173.249081, 432.877574];[158.610245, 472.439811];[173.249081, 472.439811];1
-[73.301171, 431.987004];[142.182762, 431.987004];[73.301171, 449.627971];[142.182762, 449.627971];3
-[209.500916, 386.291381];[250.324745, 386.291381];[209.500916, 399.107991];[250.324745, 399.107991];3
-[1591.299363, 217.787433];[1599.876461, 217.787433];[1591.299363, 235.1759];[1599.876461, 235.1759];1
-[1130.379548, 340.2114];[1144.987478, 340.2114];[1130.379548, 353.028009];[1144.987478, 353.028009];3
-[581.272303, 536.616011];[594.088913, 536.616011];[581.272303, 589.799764];[594.088913, 589.799764];1
-[73.78127, 385.787023];[157.422756, 385.787023];[73.78127, 420.707982];[157.422756, 420.707982];3
-[677.860729, 333.227044];[704.88452, 333.227044];[677.860729, 350.74801];[704.88452, 350.74801];3
-[254.152434, 391.056019];[266.969044, 391.056019];[254.152434, 398.75984];[266.969044, 398.75984];3
-[44.741282, 377.651385];[54.745166, 377.651385];[44.741282, 390.467994];[54.745166, 390.467994];3
-[1615.059375, 182.053432];[1625.999058, 182.053432];[1615.059375, 199.907543];[1625.999058, 199.907543];1
-[1374.086627, 520.331387];[1394.260495, 520.331387];[1374.086627, 616.919753];[1394.260495, 616.919753];1
-[73.78127, 368.171388];[88.389164, 368.171388];[73.78127, 380.987998];[88.389164, 380.987998];3
-[1257.4595, 290.041339];[1261.277371, 290.041339];[1257.4595, 299.160632];[1261.277371, 299.160632];1
-[1369.791342, 637.89618];[1382.607495, 637.89618];[1369.791342, 678.719729];[1382.607495, 678.719729];1
-[492.700803, 342.70704];[569.622591, 342.70704];[492.700803, 360.095914];[569.622591, 360.095914];3
-[674.38073, 302.627056];[708.484496, 302.627056];[674.38073, 320.148023];[708.484496, 320.148023];3
-[1238.499505, 291.361338];[1257.366341, 291.361338];[1238.499505, 303.871018];[1257.366341, 303.871018];3
-[1340.379464, 273.707068];[1404.101257, 273.707068];[1340.379464, 291.348034];[1404.101257, 291.348034];3
-[1193.391412, 508.156254];[1206.207566, 508.156254];[1193.391412, 556.679777];[1206.207566, 556.679777];1
-[168.340933, 336.827042];[292.720359, 336.827042];[168.340933, 357.00091];[292.720359, 357.00091];3
-[1216.779513, 295.213001];[1237.144475, 295.213001];[1216.779513, 311.987569];[1237.144475, 311.987569];3
-[1193.391412, 582.436225];[1206.207566, 582.436225];[1193.391412, 630.839748];[1206.207566, 630.839748];1
-[489.700804, 299.267057];[583.704673, 299.267057];[489.700804, 336.695923];[583.704673, 336.695923];3
-[228.820908, 293.880288];[240.663153, 293.880288];[228.820908, 303.120932];[240.663153, 303.120932];3
-[550.31267, 418.896286];[563.128823, 418.896286];[550.31267, 464.279781];[563.128823, 464.279781];1
-[128.260949, 332.411403];[168.244766, 332.411403];[128.260949, 345.228013];[168.244766, 345.228013];3
-[170.620932, 293.26706];[296.560358, 293.26706];[170.620932, 312.840928];[296.560358, 312.840928];3
-[552.952669, 551.616227];[565.768822, 551.616227];[552.952669, 604.679734];[565.768822, 604.679734];1
-[79.901168, 311.771411];[89.142983, 311.771411];[79.901168, 324.588021];[89.142983, 324.588021];3
-[1433.271895, 131.897561];[1452.567304, 131.897561];[1433.271895, 150.83994];[1452.567304, 150.83994];3
-[1340.379464, 253.787075];[1404.101257, 253.787075];[1340.379464, 271.548042];[1404.101257, 271.548042];3
-[502.672689, 532.896242];[534.328937, 532.896242];[502.672689, 595.319762];[534.328937, 595.319762];1
-[127.060949, 288.85142];[167.044766, 288.85142];[127.060949, 301.66803];[167.044766, 301.66803];3
-[1574.73937, 71.054519];[1591.878028, 71.054519];[1574.73937, 92.747251];[1591.878028, 92.747251];3
-[1657.900637, 94.211498];[1667.142452, 94.211498];[1657.900637, 107.028108];[1667.142452, 107.028108];3
-[1447.781876, 255.401002];[1460.619875, 255.401002];[1447.781876, 267.599893];[1460.619875, 267.599893];3
-[745.313263, 223.799864];[760.046536, 223.799864];[745.313263, 233.709778];[760.046536, 233.709778];3
-[1301.01948, 195.707099];[1370.903339, 195.707099];[1301.01948, 230.628058];[1370.903339, 230.628058];3
-[372.700851, 266.027071];[406.924675, 266.027071];[372.700851, 283.548037];[406.924675, 283.548037];3
-[1559.619391, 60.494536];[1571.652708, 60.494536];[1559.619391, 77.627266];[1571.652708, 77.627266];3
-[1411.299435, 92.531499];[1421.303319, 92.531499];[1411.299435, 105.348108];[1421.303319, 105.348108];3
-[1332.339467, 144.947119];[1386.483301, 144.947119];[1332.339467, 162.588086];[1386.483301, 162.588086];3
-[1176.09953, 169.539924];[1271.929969, 169.539924];[1176.09953, 188.735609];[1271.929969, 188.735609];3
-[372.580851, 242.499895];[407.182201, 242.499895];[372.580851, 261.69558];[407.182201, 261.69558];3
-[1457.379417, 40.299976];[1543.96988, 40.299976];[1457.379417, 89.255649];[1543.96988, 89.255649];3
-[1431.833544, 267.658168];[1452.850286, 267.658168];[1431.833544, 294.239882];[1452.850286, 294.239882];3
-[840.340664, 152.991573];[847.160648, 152.991573];[840.340664, 165.390912];[847.160648, 165.390912];1
-[730.673304, 198.479921];[754.286545, 198.479921];[730.673304, 223.749789];[754.286545, 223.749789];3
-[647.740741, 183.587104];[698.342945, 183.587104];[647.740741, 201.10807];[698.342945, 201.10807];3
-[847.060661, 144.481675];[858.961488, 144.481675];[847.060661, 160.320606];[858.961488, 160.320606];3
-[1251.579502, 89.162099];[1260.241052, 89.162099];[1251.579502, 100.440512];[1260.241052, 100.440512];3
-[1613.740655, 16.211529];[1629.124509, 16.211529];[1613.740655, 29.028139];[1629.124509, 29.028139];3
-[1393.52662, 184.50045];[1413.700488, 184.50045];[1393.52662, 250.0799];[1413.700488, 250.0799];1
-[823.060671, 159.013418];[839.8344, 159.013418];[823.060671, 181.187519];[839.8344, 181.187519];3
-[1238.619505, 80.271514];[1251.573457, 80.271514];[1238.619505, 96.120516];[1251.573457, 96.120516];3
-[1388.51098, 68.616142];[1401.32759, 68.616142];[1388.51098, 109.439956];[1401.32759, 109.439956];1
-[1217.139513, 73.093945];[1236.388601, 73.093945];[1217.139513, 92.027408];[1236.388601, 92.027408];3
-[1254.459903, 146.79769];[1273.420544, 146.79769];[1254.459903, 201.719919];[1273.420544, 201.719919];1
-[662.500742, 156.493919];[666.896688, 156.493919];[662.500742, 169.30739];[666.896688, 169.30739];1
-[1472.020711, 16.211529];[1487.404566, 16.211529];[1472.020711, 29.028139];[1487.404566, 29.028139];3
-[1175.25953, 51.819971];[1272.169911, 51.819971];[1175.25953, 71.015656];[1272.169911, 71.015656];3
-[965.154963, 118.015049];[989.607477, 118.015049];[965.154963, 151.079919];[989.607477, 151.079919];3
-[566.141179, 136.597692];[575.261823, 136.597692];[566.141179, 150.23994];[575.261823, 150.23994];3
-[1330.300768, 16.211529];[1345.684622, 16.211529];[1330.300768, 29.028139];[1345.684622, 29.028139];3
-[820.420672, 81.960372];[875.102857, 81.960372];[820.420672, 100.921013];[875.102857, 100.921013];3
-[536.873313, 215.759904];[551.606776, 215.759904];[536.873313, 225.783307];[551.606776, 225.783307];3
-[45.221282, 235.931441];[54.463097, 235.931441];[45.221282, 248.748051];[54.463097, 248.748051];3
-[1192.420823, 16.211529];[1200.124645, 16.211529];[1192.420823, 29.028139];[1200.124645, 29.028139];1
-[1180.299933, 195.87767];[1199.260573, 195.87767];[1180.299933, 250.6799];[1199.260573, 250.6799];1
-[238.183122, 152.933404];[250.061432, 152.933404];[238.183122, 162.479935];[250.061432, 162.479935];3
-[650.26074, 147.133914];[662.491106, 147.133914];[650.26074, 164.627387];[662.491106, 164.627387];3
-[1050.70088, 16.211529];[1058.404701, 16.211529];[1050.70088, 29.028139];[1058.404701, 29.028139];1
-[761.980695, 59.139969];[858.891018, 59.139969];[761.980695, 78.335653];[858.891018, 78.335653];3
-[606.820757, 68.739965];[697.971028, 68.739965];[606.820757, 87.93565];[697.971028, 87.93565];3
-[522.233344, 190.439924];[545.846784, 190.439924];[522.233344, 215.703307];[545.846784, 215.703307];3
-[115.540954, 194.747099];[166.143187, 194.747099];[115.540954, 212.268066];[166.143187, 212.268066];3
-[908.980936, 16.211529];[916.684758, 16.211529];[908.980936, 29.028139];[916.684758, 29.028139];1
-[701.872255, 100.776133];[714.688865, 100.776133];[701.872255, 119.999952];[714.688865, 119.999952];3
-[293.275079, 106.188465];[312.101354, 106.188465];[293.275079, 142.079943];[312.101354, 142.079943];1
-[243.58312, 149.465167];[252.701429, 149.465167];[243.58312, 152.879935];[252.701429, 152.879935];3
-[439.660824, 137.173567];[455.330763, 137.173567];[439.660824, 160.307476];[455.330763, 160.307476];3
-[355.900858, 150.240345];[410.703102, 150.240345];[355.900858, 169.200985];[410.703102, 169.200985];3
-[220.795036, 162.385232];[242.891778, 162.385232];[220.795036, 190.199924];[242.891778, 190.199924];3
-[138.100945, 165.973981];[158.335568, 165.973981];[138.100945, 182.747352];[158.335568, 182.747352];3
-[767.380993, 16.211529];[775.084815, 16.211529];[767.380993, 29.028139];[775.084815, 29.028139];1
-[1612.431742, 158.879936];[1626.565951, 158.879936];[1612.431742, 179.640138];[1626.565951, 179.640138];3
-[62.610583, 140.197691];[76.40932, 140.197691];[62.610583, 183.599927];[76.40932, 183.599927];1
-[45.101282, 94.211498];[54.343097, 94.211498];[45.101282, 107.028108];[54.343097, 107.028108];3
-[404.740838, 74.619962];[490.251222, 74.619962];[404.740838, 93.815647];[490.251222, 93.815647];3
-[304.423275, 97.113139];[314.981355, 97.113139];[304.423275, 106.199961];[314.981355, 106.199961];3
-[200.741325, 86.43772];[209.861969, 86.43772];[200.741325, 100.19996];[209.861969, 100.19996];1
-[201.341325, 86.43772];[219.581965, 86.43772];[201.341325, 116.399953];[219.581965, 116.399953];1
-[74.753731, 72.479925];[89.487002, 72.479925];[74.753731, 82.509835];[89.487002, 82.509835];3
-[279.580888, 66.339966];[377.811261, 66.339966];[279.580888, 85.53565];[377.811261, 85.53565];3
-[566.741178, 136.597692];[584.981819, 136.597692];[566.741178, 166.559933];[584.981819, 166.559933];1
-[60.113772, 47.159981];[83.727013, 47.159981];[60.113772, 72.429849];[83.727013, 72.429849];3
-[136.060946, 59.259968];[220.371271, 59.259968];[136.060946, 78.455653];[220.371271, 78.455653];3
-[625.54105, 16.091529];[633.244872, 16.091529];[625.54105, 28.908139];[633.244872, 28.908139];1
-[483.941106, 16.211529];[491.644928, 16.211529];[483.941106, 29.028139];[491.644928, 29.028139];1
-[567.343079, 61.628617];[579.221279, 61.628617];[567.343079, 69.71997];[579.221279, 69.71997];3
-[342.101163, 16.211529];[349.804985, 16.211529];[342.101163, 29.028139];[349.804985, 29.028139];1
-[547.194758, 69.778187];[573.581277, 69.778187];[547.194758, 102.359959];[573.581277, 102.359959];3
-[200.50122, 16.211529];[208.205042, 16.211529];[200.50122, 29.028139];[208.205042, 29.028139];1
-[498.832336, 118.536128];[511.648946, 118.536128];[498.832336, 126.23995];[511.648946, 126.23995];3
-[58.661277, 16.211529];[66.365098, 16.211529];[58.661277, 29.028139];[66.365098, 29.028139];1
-[986.500605, 865.811189];[1013.52445, 865.811189];[986.500605, 878.627799];[1013.52445, 878.627799];3
-[59.741276, 992.0344];[105.967008, 992.0344];[59.741276, 999.307117];[105.967008, 999.307117];3
-[225.581259, 1064.342753];[299.797623, 1064.342753];[225.581259, 1069.826817];[299.797623, 1069.826817];3
-[59.741276, 1074.354367];[267.006923, 1074.354367];[59.741276, 1081.627084];[267.006923, 1081.627084];3
-[140.141244, 1086.71997];[228.783448, 1086.71997];[140.141244, 1095.840615];[228.783448, 1095.840615];3
-[59.741276, 1102.674356];[102.847006, 1102.674356];[59.741276, 1109.947073];[102.847006, 1109.947073];3
-[59.741276, 1113.075261];[294.113384, 1113.075261];[59.741276, 1131.800507];[294.113384, 1131.800507];3
-[59.741276, 1136.634342];[128.406996, 1136.634342];[59.741276, 1143.907059];[128.406996, 1143.907059];3
-[475.06081, 1107.706734];[572.446712, 1107.706734];[475.06081, 1127.280602];[572.446712, 1127.280602];3
-[530.020788, 1108.319962];[541.863032, 1108.319962];[530.020788, 1117.440606];[541.863032, 1117.440606];3
-[619.060752, 1103.291094];[659.884552, 1103.291094];[619.060752, 1116.107704];[659.884552, 1116.107704];3
-[753.340699, 1094.411098];[797.904584, 1094.411098];[753.340699, 1107.227708];[797.904584, 1107.227708];3
-[753.580699, 1120.06886];[796.50293, 1120.06886];[753.580699, 1133.867697];[796.50293, 1133.867697];3
-[970.900611, 1030.77958];[986.123277, 1030.77958];[970.900611, 1077.215254];[986.123277, 1077.215254];1
-[342.101163, 1164.13107];[349.804985, 1164.13107];[342.101163, 1176.94768];[349.804985, 1176.94768];1
-[483.941106, 1164.25107];[491.644928, 1164.25107];[483.941106, 1177.06768];[491.644928, 1177.06768];1
-[625.54105, 1164.13107];[633.244872, 1164.13107];[625.54105, 1176.94768];[633.244872, 1176.94768];1
-[767.381016, 1164.13107];[775.084838, 1164.13107];[767.381016, 1176.94768];[775.084838, 1176.94768];1
-[1451.619419, 965.229798];[1453.980015, 965.229798];[1451.619419, 971.787009];[1453.980015, 971.787009];1
-[1491.579403, 965.829798];[1527.32109, 965.829798];[1491.579403, 972.387009];[1527.32109, 972.387009];3
-[1547.259381, 965.949798];[1556.667662, 965.949798];[1547.259381, 972.507009];[1556.667662, 972.507009];3
-[1576.659369, 965.829798];[1612.401056, 965.829798];[1576.659369, 972.387009];[1612.401056, 972.387009];3
-[1630.419348, 965.949798];[1640.734494, 965.949798];[1630.419348, 972.507009];[1640.734494, 972.507009];3
-[1451.619419, 977.469793];[1453.980015, 977.469793];[1451.619419, 984.027005];[1453.980015, 984.027005];1
-[1491.579403, 978.189793];[1527.32109, 978.189793];[1491.579403, 984.747004];[1527.32109, 984.747004];3
-[1547.259691, 978.189793];[1556.667972, 978.189793];[1547.259691, 984.747004];[1556.667972, 984.747004];3
-[1576.659669, 978.189793];[1612.401356, 978.189793];[1576.659669, 984.747004];[1612.401356, 984.747004];3
-[1630.899994, 978.189793];[1640.271303, 978.189793];[1630.899994, 984.747004];[1640.271303, 984.747004];3
-[1153.179539, 990.429788];[1157.120954, 990.429788];[1153.179539, 996.986999];[1157.120954, 996.986999];1
-[1167.939533, 989.709788];[1232.481401, 989.709788];[1167.939533, 996.267];[1232.481401, 996.267];3
-[1451.619419, 989.709788];[1453.980015, 989.709788];[1451.619419, 996.267];[1453.980015, 996.267];1
-[1491.579403, 990.429788];[1527.32109, 990.429788];[1491.579403, 996.986999];[1527.32109, 996.986999];3
-[1547.259691, 990.429788];[1556.667972, 990.429788];[1547.259691, 996.986999];[1556.667972, 996.986999];3
-[1576.659669, 990.429788];[1612.401356, 990.429788];[1576.659669, 996.986999];[1612.401356, 996.986999];3
-[1630.899994, 990.429788];[1640.271303, 990.429788];[1630.899994, 996.986999];[1640.271303, 996.986999];3
-[1153.179539, 1002.669783];[1157.120954, 1002.669783];[1153.179539, 1009.226994];[1157.120954, 1009.226994];1
-[1167.939533, 1001.949783];[1326.801563, 1001.949783];[1167.939533, 1008.506995];[1326.801563, 1008.506995];3
-[1451.619419, 1001.949783];[1453.980015, 1001.949783];[1451.619419, 1008.506995];[1453.980015, 1008.506995];1
-[1491.579403, 1002.669783];[1527.32109, 1002.669783];[1491.579403, 1009.226994];[1527.32109, 1009.226994];3
-[1547.259691, 1002.669783];[1556.667972, 1002.669783];[1547.259691, 1009.226994];[1556.667972, 1009.226994];3
-[1576.659669, 1002.669783];[1612.401356, 1002.669783];[1576.659669, 1009.226994];[1612.401356, 1009.226994];3
-[1437.819425, 1014.947386];[1467.73837, 1014.947386];[1437.819425, 1021.146955];[1467.73837, 1021.146955];3
-[1500.4594, 1014.947386];[1518.369774, 1014.947386];[1500.4594, 1021.146955];[1518.369774, 1021.146955];3
-[1268.259493, 1014.947386];[1314.258403, 1014.947386];[1268.259493, 1021.146955];[1314.258403, 1021.146955];3
-[1148.019541, 1079.874365];[1174.925371, 1079.874365];[1148.019541, 1087.147082];[1174.925371, 1087.147082];3
-[846.460661, 1086.251101];[861.068533, 1086.251101];[846.460661, 1099.067711];[861.068533, 1099.067711];3
-[846.460661, 1103.386736];[913.182453, 1103.386736];[846.460661, 1120.77561];[913.182453, 1120.77561];3
-[184.421226, 1151.519945];[250.820238, 1151.519945];[184.421226, 1160.640589];[250.820238, 1160.640589];3
-[200.501243, 1164.25107];[208.205065, 1164.25107];[200.501243, 1177.06768];[208.205065, 1177.06768];1
-[1167.939533, 965.109798];[1198.041153, 965.109798];[1167.939533, 971.667009];[1198.041153, 971.667009];3
-[1167.939533, 977.469793];[1232.481401, 977.469793];[1167.939533, 984.027005];[1232.481401, 984.027005];3
-[908.981001, 1164.13107];[916.684822, 1164.13107];[908.981001, 1176.94768];[916.684822, 1176.94768];1
-[1630.899994, 1002.669783];[1640.271303, 1002.669783];[1630.899994, 1009.226994];[1640.271303, 1009.226994];3
-[1533.819386, 1014.947386];[1613.16964, 1014.947386];[1533.819386, 1021.146955];[1613.16964, 1021.146955];3
-[1621.419351, 1014.947386];[1649.76968, 1014.947386];[1621.419351, 1021.146955];[1649.76968, 1021.146955];3
-[1368.339453, 1026.467381];[1432.329619, 1026.467381];[1368.339453, 1032.666951];[1432.329619, 1032.666951];3
-[141.941243, 1136.87995];[145.224675, 1136.87995];[141.941243, 1146.000595];[145.224675, 1146.000595];1
-[59.741276, 1150.914336];[174.126989, 1150.914336];[59.741276, 1158.187054];[174.126989, 1158.187054];3
-[58.661277, 1164.25107];[66.365098, 1164.25107];[58.661277, 1177.06768];[66.365098, 1177.06768];1
-[550.18078, 1077.731105];[591.004579, 1077.731105];[550.18078, 1090.547714];[591.004579, 1090.547714];3
-[446.140822, 1083.946743];[504.543022, 1083.946743];[446.140822, 1101.46771];[504.543022, 1101.46771];3
-[1153.179539, 965.949798];[1157.120954, 965.949798];[1153.179539, 972.507009];[1157.120954, 972.507009];1
-[1153.179539, 978.189793];[1157.120954, 978.189793];[1153.179539, 984.747004];[1157.120954, 984.747004];1
-[1148.019541, 1037.512985];[1624.438334, 1037.512985];[1148.019541, 1074.96047];[1624.438334, 1074.96047];3
-[1065.351837, 1093.199563];[1087.526593, 1093.199563];[1065.351837, 1157.877165];[1087.526593, 1157.877165];1
-[44.741282, 1086.251101];[54.745166, 1086.251101];[44.741282, 1099.067711];[54.745166, 1099.067711];3
-[1657.180637, 944.531158];[1667.960445, 944.531158];[1657.180637, 957.347768];[1667.960445, 957.347768];3
-[1147.299541, 1014.947386];[1163.082836, 1014.947386];[1147.299541, 1021.146955];[1163.082836, 1021.146955];3
-[842.140651, 1058.052974];[885.813743, 1058.052974];[842.140651, 1074.107185];[885.813743, 1074.107185];3
-[671.752621, 1078.535992];[684.568775, 1078.535992];[671.752621, 1130.879548];[684.568775, 1130.879548];1
-[59.741276, 1064.342753];[130.717626, 1064.342753];[59.741276, 1069.826817];[130.717626, 1069.826817];3
-[976.900609, 938.619617];[991.883275, 938.619617];[976.900609, 957.815302];[991.883275, 957.815302];3
-[782.860736, 1018.310953];[834.843042, 1018.310953];[782.860736, 1039.066935];[834.843042, 1039.066935];3
-[715.180714, 1020.699584];[752.123423, 1020.699584];[715.180714, 1039.895269];[752.123423, 1039.895269];3
-[517.432683, 1043.376041];[530.248836, 1043.376041];[517.432683, 1062.719575];[530.248836, 1062.719575];1
-[234.101206, 1015.805621];[294.334146, 1015.805621];[234.101206, 1038.17417];[294.334146, 1038.17417];3
-[89.021264, 1026.011125];[137.645082, 1026.011125];[89.021264, 1038.827735];[137.645082, 1038.827735];3
-[573.94077, 1004.651134];[614.76457, 1004.651134];[573.94077, 1017.467744];[614.76457, 1017.467744];3
-[354.232748, 1051.385739];[367.048902, 1051.385739];[354.232748, 1116.839553];[367.048902, 1116.839553];1
-[204.221218, 1006.314394];[245.646978, 1006.314394];[204.221218, 1013.587111];[245.646978, 1013.587111];3
-[322.541276, 1013.437387];[341.501917, 1013.437387];[322.541276, 1049.03958];[341.501917, 1049.03958];1
-[59.741276, 1006.314394];[115.566992, 1006.314394];[59.741276, 1013.587111];[115.566992, 1013.587111];3
-[1103.979558, 914.893963];[1165.020547, 914.893963];[1103.979558, 951.706963];[1165.020547, 951.706963];3
-[857.740653, 919.813756];[901.536558, 919.813756];[857.740653, 935.74703];[901.536558, 935.74703];3
-[825.232206, 969.095757];[838.048815, 969.095757];[825.232206, 995.999602];[838.048815, 995.999602];1
-[593.860762, 960.733091];[610.305287, 960.733091];[593.860762, 983.147201];[610.305287, 983.147201];3
-[59.741276, 977.994406];[155.167011, 977.994406];[59.741276, 985.267123];[155.167011, 985.267123];3
-[804.47264, 919.60313];[823.168699, 919.60313];[804.47264, 938.999624];[823.168699, 938.999624];3
-[414.580834, 932.019619];[502.371181, 932.019619];[414.580834, 951.215304];[502.371181, 951.215304];3
-[673.9751, 917.411228];[693.581776, 917.411228];[673.9751, 1006.799597];[693.581776, 1006.799597];1
-[362.740855, 935.051162];[403.564654, 935.051162];[362.740855, 947.867771];[403.564654, 947.867771];3
-[59.741276, 943.07442];[291.286592, 943.07442];[59.741276, 972.840664];[291.286592, 972.840664];3
-[490.552694, 959.136077];[503.368847, 959.136077];[490.552694, 966.839613];[503.368847, 966.839613];3
-[44.381282, 944.531158];[55.16109, 944.531158];[44.381282, 957.347768];[55.16109, 957.347768];3
-[570.952661, 912.936093];[583.768815, 912.936093];[570.952661, 932.159627];[583.768815, 932.159627];3
-[857.740644, 890.774196];[892.181091, 890.774196];[857.740644, 912.466914];[892.181091, 912.466914];3
-[1050.700422, 1164.13107];[1058.404244, 1164.13107];[1050.700422, 1176.94768];[1058.404244, 1176.94768];1
-[1204.779518, 1079.994365];[1226.765291, 1079.994365];[1204.779518, 1087.267082];[1226.765291, 1087.267082];3
-[1158.819536, 1092.359968];[1191.181708, 1092.359968];[1158.819536, 1101.480613];[1191.181708, 1101.480613];3
-[1222.059511, 1093.319968];[1241.341711, 1093.319968];[1222.059511, 1102.440612];[1241.341711, 1102.440612];3
-[1261.419495, 1079.874365];[1277.765169, 1079.874365];[1261.419495, 1087.147082];[1277.765169, 1087.147082];3
-[1278.699489, 1092.479968];[1295.192954, 1092.479968];[1278.699489, 1101.600612];[1295.192954, 1101.600612];3
-[1318.179473, 1079.874365];[1355.645224, 1079.874365];[1318.179473, 1107.547074];[1355.645224, 1107.547074];3
-[1417.299433, 1079.879973];[1486.640378, 1079.879973];[1417.299433, 1089.000618];[1486.640378, 1089.000618];3
-[1417.299433, 1108.319962];[1469.600455, 1108.319962];[1417.299433, 1117.440606];[1469.600455, 1117.440606];3
-[1181.619527, 1123.451086];[1201.583466, 1123.451086];[1181.619527, 1136.267696];[1201.583466, 1136.267696];3
-[1188.459525, 1153.696277];[1255.031485, 1153.696277];[1188.459525, 1159.24017];[1255.031485, 1159.24017];3
-[1192.420397, 1164.13107];[1200.124219, 1164.13107];[1192.420397, 1176.94768];[1200.124219, 1176.94768];1
-[1491.099404, 1116.371089];[1630.083507, 1116.371089];[1491.099404, 1129.187699];[1630.083507, 1129.187699];3
-[1221.459511, 1124.165578];[1280.97267, 1124.165578];[1221.459511, 1135.134131];[1280.97267, 1135.134131];3
-[1417.299433, 1136.632945];[1469.158167, 1136.632945];[1417.299433, 1144.680442];[1469.158167, 1144.680442];3
-[1165.299534, 1139.752944];[1280.536955, 1139.752944];[1165.299534, 1147.800441];[1280.536955, 1147.800441];3
-[1615.779354, 1136.632945];[1638.598109, 1136.632945];[1615.779354, 1144.680442];[1638.598109, 1144.680442];3
-[1498.059401, 1145.771077];[1552.083419, 1145.771077];[1498.059401, 1158.587687];[1552.083419, 1158.587687];3
-[1323.219471, 1153.312939];[1389.616813, 1153.312939];[1323.219471, 1161.360435];[1389.616813, 1161.360435];3
-[1330.300378, 1164.13107];[1345.684232, 1164.13107];[1330.300378, 1176.94768];[1345.684232, 1176.94768];3
-[1657.540637, 1086.251101];[1667.544521, 1086.251101];[1657.540637, 1099.067711];[1667.544521, 1099.067711];3
-[1491.099404, 1087.091101];[1615.083514, 1087.091101];[1491.099404, 1099.907711];[1615.083514, 1099.907711];3
-[1472.020711, 1164.25107];[1487.404566, 1164.25107];[1472.020711, 1177.06768];[1487.404566, 1177.06768];3
-[1622.499425, 1147.325569];[1645.892419, 1147.325569];[1622.499425, 1158.294122];[1645.892419, 1158.294122];3
-[1612.900791, 1164.25107];[1629.964553, 1164.25107];[1612.900791, 1177.06768];[1629.964553, 1177.06768];3

+ 0 - 47
old/cluster_by_distance_csv_only.py

@@ -1,47 +0,0 @@
-#go through csv file, speichern x und y
-#dann loop durch alle anderen x und y:
-#    if abstand x weniger als
-#    if abstand y weniger als
-#    dann selber cluster
-#    alles in ein file/variable speichern und werte löschen??? oder markieren als bereits geclustert
-#    alles durchgehen
-#wenn alles durch dann neues x und y und nochmal alles von vorn
-
-
-import csv
-
-csvfile1 = open('values_LH.csv', 'r')
-spamreader1 = list(csv.reader(csvfile1, dialect='excel', delimiter=','))
-csvfile1.close()
-already_merged=False
-new_rows_list = []
-with open("values_LH.csv", "r") as csvfile:
-    reader = csv.reader(csvfile, delimiter=',')
-    for row in reader:
-        #print(row)
-        x = row[5]
-        #print(x)
-        y = row[6]
-        #print(y)
-        for row1 in spamreader1:
-            #print(row1)
-            x1 = row1[5]
-            #print(x1)
-            y1 = row1[6]
-            #print(abs(float(x1) - float(x)))
-            #print(abs(float(y1) - float(y)))
-            if (abs(float(x1) - float(x)) < 30.0) and (abs(float(y1) - float(y)) < 5.0): # and row[7] == False:
-                #print(row)
-                row[4] = row[4] + " " + row1[4]
-                #print(row[4])
-                row[7] = True
-                new_row = [row[0], row[1], row[2],row[3],row[4],row[5],row[6]] #write all values, including new merged text
-                print(new_row)
-                new_rows_list.append(new_row)
-csvfile.close()
-
-
-file2 = open("merged_values.csv", 'w')
-writer = csv.writer(file2)
-writer.writerows(new_rows_list)
-file2.close()

+ 0 - 29
old/csv_to_text.py

@@ -1,29 +0,0 @@
-import pandas
-import csv
-import re
-#data_df = pandas.read_csv("values_LH.csv", sep=",")
-#print(data_df.head(3))
-
-#data = data_df[["X1","Y1","X2","Y2"]]
-#print(data)
-
-
-def read_csv(file):
-    text = []
-    with open(file, 'r') as csvFile:
-        reader = csv.reader(csvFile, delimiter=",")
-        for row in reader:
-            text.append(row[2])
-    csvFile.close()
-    ###extract ISOs
-    matches = []
-    regex = r"(ISO\s\d\d\d\d?\W?\d?\W?\d?\W?\d?)"
-    for line in text:
-        match = re.findall(regex, line)
-        if match:
-            matches.append(match)
-
-    print(matches)
-
-
-    return text

+ 0 - 120
old/dbscan_clustering.py

@@ -1,120 +0,0 @@
-import numpy as np
-import pandas
-import csv
-import order_bounding_boxes_in_each_block
-
-from sklearn.cluster import DBSCAN
-from sklearn import metrics
-from sklearn.datasets.samples_generator import make_blobs
-from sklearn.preprocessing import StandardScaler
-
-def my_distance(x,y):
-    blub = "ddd"
-    return blub
-
-
-def cluster(file_in, file_out):
-    # #############################################################################
-    data_df = pandas.read_csv("/home/bscheibel/PycharmProjects/engineering_drawings_extraction/temporary/list_to_csv_with_avg_points.csv", sep=";")
-    data_df.head(3)
-    data = data_df[["xavg_elem","yavg_elem","ausrichtung"]]
-    #print(data)
-    data = StandardScaler().fit_transform(data)
-
-    # #############################################################################
-    # Compute DBSCAN
-    db = DBSCAN(eps=0.075, min_samples=1, metric="euclidean").fit(data)
-    #core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
-    #core_samples_mask[db.core_sample_indices_] = True
-    labels = db.labels_
-    print(data[labels == 0])
-    data_df["cluster"] = labels
-
-    # Number of clusters in labels, ignoring noise if present.
-    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
-    n_noise_ = list(labels).count(-1)
-
-    print('Estimated number of clusters: %d' % n_clusters_)
-    print('Estimated number of noise points: %d' % n_noise_)
-    print("Silhouette Coefficient: %0.3f"
-          % metrics.silhouette_score(data, labels))
-
-    # #############################################################################
-    # Plot result
-    """ort matplotlib.pyplot as plt
-
-    # Black removed and is used for noise instead.
-    unique_labels = set(labels)
-    colors = [plt.cm.Spectral(each)
-              for each in np.linspace(0, 1, len(unique_labels))]
-    for k, col in zip(unique_labels, colors):
-        if k == -1:
-            # Black used for noise.
-            col = [0, 0, 0, 1]
-
-        class_member_mask = (labels == k)
-
-        xy = data[class_member_mask & core_samples_mask]
-        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
-                 markeredgecolor='k', markersize=14)
-
-        xy = data[class_member_mask & ~core_samples_mask]
-        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
-                 markeredgecolor='k', markersize=6)
-
-    plt.title('Estimated number of clusters: %d' % n_clusters_)
-    plt.show()"""
-
-    #print(data_df.head(3))
-    #data_df.to_csv("values_clusteredfromPDF_GV12.csv")
-    data_df.groupby('cluster')['element'].apply(' '.join).reset_index().to_csv("values_clusteredfromHTML_layout_LH.csv",sep=";")
-
-
-def get_average_xy(list_input):
-    csv_name = "temporary/list_to_csv_with_avg_points.csv"
-    new_list = []
-    resultFile = open(csv_name, 'w')
-    wr = csv.writer(resultFile, delimiter=";")
-    wr.writerow(["element", "xavg_elem","yavg_elem", "ausrichtung"])
-    for element in list_input:
-        xavg_elem = 0
-        yavg_elem = 0
-        y_min = 1000000
-        y_max = 0
-        x_min = 1000000
-        x_max = 0
-        for blub in element:
-            xavg_elem += (float(blub[0]) + float(blub[2]))/2
-            yavg_elem += (float(blub[1]) + float(blub[3]))/2
-            if float(blub[1]) < y_min:
-                y_min = float(blub[1])
-                #print("y_min:",y_min)
-            if float(blub[0]) < x_min:
-                x_min = float(blub[0])
-            if float(blub[3]) > y_max:
-                y_max = float(blub[3])
-            if float(blub[2]) > x_max:
-                x_max = float(blub[2])
-        if x_max-x_min > y_max-y_min:
-            ausrichtung = 0
-        else:
-            ausrichtung = 1
-        xavg_elem = xavg_elem/len(element)
-        #print(xavg_elem)
-        yavg_elem = yavg_elem/len(element)
-        #element.extend([xavg_elem, yavg_elem])
-        #print(element)
-        #new_list.append(element)
-        wr.writerow([element,xavg_elem,yavg_elem, ausrichtung])
-
-    resultFile.close()
-    #print(new_list)
-    return csv_name
-
-
-#cluster(33,33)
-file = "/home/bscheibel/PycharmProjects/engineering_drawings_extraction/drawings/5152166_Rev04.html"
-#file = "/home/bscheibel/PycharmProjects/engineering_drawings_extraction/drawings/5129275_Rev01-GV12.html"
-#result = order_bounding_boxes_in_each_block.get_bound_box(file)
-#get_average_xy(result)
-cluster(33,33)

+ 0 - 34
old/dxf_line_reader.py

@@ -1,34 +0,0 @@
-
-def printpoint(b):
-    print(b)
-    obj = dict(zip(b[::2], b[1::2]))
-    try:
-        if obj['100'] == 'AcDbMText':
-            print('{}'.format(obj['0']))
-    except:
-        pass
-
-buffer = ['0', 'fake']
-filepath = '../drawings/GV_12.DXF'
-with open(filepath,'r') as fp:
-    line = fp.readline()
-    cnt = 1
-    while line:
-        line = fp.readline()
-    #line = line.rstrip()
-        print(line)
-        if line == '0':  # we've started a new section, so
-            print("Line {}: {}".format(cnt, line.strip()))
-            try:
-                printpoint(buffer)  # handle the captured section
-            except:
-                print("ERROR")
-
-    #buffer = []  # and start a new one
-    #buffer.append(line)
-    cnt += 1
-fp.close()
-
-#printpoint(buffer)        # buffer left over from last pass through loop
-
-#https://leancrew.com/all-this/2016/12/dxf-data-extraction/

+ 0 - 24
old/dxf_reader.py

@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-
-#from fileinput \
-import fileinput
-
-
-def printpoint(b):
-    print(b)
-    obj = dict(zip(b[::2], b[1::2]))
-    if obj['0'] == 'AcDbMText':
-        print('{}'.format(obj['0']))
-
-
-#print('Code','Text')# header line
-buffer = ['0', 'fake']    # give first pass through loop something to process
-for line in fileinput.input("../drawings/GV_12.DXF"):
-    line = line.rstrip()
-    #print(line)
-    if line == '0':         # we've started a new section, so
-        printpoint(buffer)      # handle the captured section
-        buffer = []             # and start a new one
-    buffer.append(line)
-
-printpoint(buffer)        # buffer left over from last pass through loop

+ 0 - 18
old/find_page_in_pdf.py

@@ -1,18 +0,0 @@
-import PyPDF2
-import re
-
-# Open the pdf file
-object = PyPDF2.PdfFileReader("iso_documents/ISO8015.PDF")
-
-# Get number of pages
-NumPages = object.getNumPages()
-
-# Enter code here
-String = "Inhalt"
-
-# Extract text and do the search
-for i in range(0, NumPages):
-    PageObj = object.getPage(i)
-    Text = PageObj.extractText()
-    if re.search(String,Text):
-         print("Pattern Found on Page: " + str(i))

+ 0 - 10
old/main.py

@@ -1,10 +0,0 @@
-from old import read_text_lines_from_dxf, read_data
-
-#import merge_pandas
-
-
-
-file = "drawings/5152166.dxf"
-file_out = "5152166_extracted.csv"
-read_text_lines_from_dxf.read(file, file_out)
-read_data.read_dimensions(file_out, 0)

+ 0 - 43
old/merge_lines.py

@@ -1,43 +0,0 @@
-import csv
-
-## open CSV file and rea it
-myfile  = open('text.csv', "r")
-reader = csv.reader(myfile, delimiter=";")
-## create an empty dictionary
-mydictionary = {}
-
-rownum = 0
-
-for row in reader:
-    ## check if it is the header
-    if rownum == 0:
-        pass
-    else:
-        ## split the line of CSV in elements..Use the name for the key in dictionary and the other two in a list
-        #line = row.split(";")
-        #print(row)
-        text = row[0]
-        #print(text)
-        x = row[1]
-        y = row[2]
-
-        if x in mydictionary:
-            mydictionary[text][1] += text
-            print(mydictionary[text][1] )
-        else:
-            mydictionary[text] = [x,y]
-
-    rownum += 1
-
-myfile.close()
-
-## create a new list of lists with the data from the dictionary
-newcsvfile = ["text","x","y"]
-
-for i in mydictionary:
-    newcsvfile.append(mydictionary[i])
-
-## write the new list of lists in a new CSV file
-with open("output.csv", "wb") as f:
-    writer = csv.writer(f)
-    writer.writerows(newcsvfile)

+ 0 - 7
old/merge_pandas.py

@@ -1,7 +0,0 @@
-import pandas
-
-def merge_lines(file_out):
-    df = pandas.read_csv(file_out, header = 0, delimiter=";")
-    df['Text'] = df.groupby(['X','Y'])['TEXT'].transform('sum')
-    df.drop_duplicates()
-    df.to_csv(file_out)

+ 0 - 43
old/ocr_test.py

@@ -1,43 +0,0 @@
-import PyPDF2
-from tika import parser
-from nltk.tokenize import word_tokenize
-from nltk.corpus import stopwords
-import nltk
-nltk.download('stopwords')
-
-
-#write a for-loop to open many files -- leave a comment if you'd #like to learn how
-filename = "../drawings/GV_12.PDF"
-#open allows you to read the file
-pdfFileObj = open(filename,'rb')
-#The pdfReader variable is a reada2ble object that will be parsed
-pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
-#discerning the number of pages will allow us to parse through all #the pages
-num_pages = pdfReader.numPages
-count = 0
-text = ""
-#The while loop will read each page
-while count < num_pages:
-    pageObj = pdfReader.getPage(count)
-    count +=1
-    text += pageObj.extractText()
-#This if statement exists to check if the above library returned #words. It's done because PyPDF2 cannot read scanned files.
-if text != "":
-   text = text
-#If the above returns as False, we run the OCR library textract to #convert scanned/image based PDF files into text
-else:
-    raw = parser.from_file("../drawings/GV_12.PDF")
-    raw = str(raw)
-    safe_text = raw.encode('utf-8', errors='ignore')
-    text = str(safe_text).replace("\n", "").replace("\\", "")
-    print(raw)
-
-#The word_tokenize() function will break our text phrases into #individual words
-tokens = word_tokenize(text)
-#we'll create a new list which contains punctuation we wish to clean
-punctuations = ['(',')',';',':','[',']',',']
-#We initialize the stopwords variable which is a list of words like #"The", "I", "and", etc. that don't hold much value as keywords
-stop_words = stopwords.words('english')
-#We create a list comprehension which only returns a list of words #that are NOT IN stop_words and NOT IN punctuations.
-keywords = [word for word in tokens if not word in stop_words and not word in punctuations]
-print(keywords)

+ 0 - 131
old/read_data.py

@@ -1,131 +0,0 @@
-import csv
-import re
-
-
-def read_dimensions(file_out, num):
-    toleranzen_included = False
-    
-    with open(file_out) as csv_file:
-        csv_reader = csv.reader(csv_file, delimiter=';')
-        for row in csv_reader:
-            #print("test")
-            if row[num] == "-" or row[num] == "+":
-                toleranzen_included = True
-            #    print("test2")
-
-    with open(file_out) as csv_file:
-        csv_reader = csv.reader(csv_file, delimiter=';')
-        line_count = 0
-        durchmesser = False
-        vorzeichen1 = "nix"
-        is2vorzeichen = False
-        vorzeichen2 = "nix"
-        isos = []
-        dimensions = []
-
-
-
-
-
-        for row in csv_reader:
-            #print(row)
-            line_count += 1
-            #print(line_count)
-
-            if "ISO" in row[num]:
-                isos.append(row[num])
-
-            if durchmesser:
-                #print("Durchmesser: " + row[1])
-                dimensions.append("Durchmesser: " + row[num])
-                durchmesser = False
-                continue
-
-            if row[num] == "%%c":
-                durchmesser = True
-                continue
-
-            if vorzeichen1 != "nix" and (row[num] == "-" or row[num] == "+"):
-                is2vorzeichen = True
-                vorzeichen2 = row[num]
-                continue
-            if (row[num] == "-" or row[num] == "+") and ismaybenumber:
-                dimensions.append(ismaybenumber[0])
-
-            if row[num] == "-" or row[num] == "+":
-                vorzeichen1 = row[num]
-                continue
-
-            if "%%c<>" in row[num]:
-                continue
-
-            if "2x %%c" in row[num]:
-                continue
-
-            if "16%" in row[num]:
-                continue
-            if "R" in row[num][0] and not "Rz" in row[num]:
-                dimensions.append("Radius: " + row[num][1:])
-                continue
-
-            isnumber = re.findall(r"\d*\,\d+|\d*\.\d+", row[num][0:6]) #regex to get number from line
-            ismaybenumber = re.findall(r"^[0-9]+$",row[num])
-            if isnumber:
-                #print(isnumber)
-                if vorzeichen1 != "nix":
-                    if vorzeichen1 == "-" and (row[num] == "0,00" or row[num] == "0,0"):
-                        dimensions.append(row[num])
-                    else:
-                    #print(vorzeichen + isnumber[0])
-                        dimensions.append(vorzeichen1 + isnumber[0])
-                        vorzeichen1 = "nix"
-                else:
-                    if row[num][0]!="?":
-                        #print(isnumber[0])
-                        dimensions.append(isnumber[0])
-                if is2vorzeichen is True:
-                    vorzeichen1 = vorzeichen2
-                    is2vorzeichen = False
-                    vorzeichen2 = "nix"
-
-
-            if row[num][0] == "?" and row[num][1].isdigit():
-                #print("+/- " + row[1][1:])
-                dimensions.append("+/- " + row[num][1:])
-
-        if isos.__len__()>0:
-            print(isos)
-        else:
-            print("No regulations found.")
-        #print(dimensions)
-        print(f'Processed {line_count} lines.')
-
-        dim = []
-        dim_count = 0
-        if not toleranzen_included:
-            print("Maße")
-        for x in dimensions:
-            if not toleranzen_included:
-                print(x)
-                continue
-            if x == "Durchmesser: ":
-                dim_count = 0
-            if dim_count > 2:
-                dim_count = 0
-            if dim_count == 0:
-                print("Maße: " + "\n" + x)
-                dim_count += 1
-                continue
-            if dim_count == 1:
-                print("Toleranzen: " + "\n" + x)
-                dim_count += 1
-                if "+/-" in x:
-                    dim_count += 1
-                continue
-            if dim_count == 2:
-                print(x)
-                dim_count += 1
-                continue
-
-
-

+ 0 - 34
old/read_html_to_csv.py

@@ -1,34 +0,0 @@
-import re
-import csv
-
-
-with open("drawings/5129275_Rev01-GV12.html", "r") as f:
-    with open('values_fromhtml_GV12.csv', 'w') as writeFile:
-        for line in f.readlines():
-            #print(line)
-            row = []
-            if "<word" in line:
-                exMin = r"xMin=\"(\d*\.?\d*)"
-                exMin = re.findall(exMin,line)[0]
-                row.append(exMin)
-                eyMin = r"yMin=\"(\d*\.?\d*)"
-                eyMin = re.findall(eyMin,line)[0]
-                row.append(eyMin)
-                exMax = r"xMax=\"(\d*\.?\d*)"
-                exMax = re.findall(exMax,line)[0]
-                row.append(exMax)
-                eyMax = r"yMax=\"(\d*\.?\d*)"
-                eyMax = re.findall(eyMax,line)[0]
-                row.append(eyMax)
-                Text = r">(.+)<" #wieso wird was mit "" extrahiert???
-                Text = re.findall(Text,line)[0]
-                row.append(Text.replace(',','.'))
-                avgX=(float(exMin)+float(exMax))/2.0
-                row.append(avgX)
-                avgY=(float(eyMin)+float(eyMax))/2.0
-                row.append(avgY)
-                row.append(False)
-                writer = csv.writer(writeFile)
-                writer.writerow(row)
-    writeFile.close()
-

+ 0 - 17
old/read_pdf.py

@@ -1,17 +0,0 @@
-# from tika import parser
-#
-# raw = parser.from_file("GV_12.pdf")
-# raw = str(raw)
-#
-# safe_text = raw.encode('utf-8', errors='ignore')
-#
-# #safe_text = str(safe_text).replace("\n", "").replace("\\", "")
-# print('--- safe text ---' )
-# print(safe_text
-#
-# )
-
-
-import textract
-text = textract.process("../drawings/GV_12.PDF")
-print(text)

+ 0 - 33
old/read_text_lines_from_dxf.py

@@ -1,33 +0,0 @@
-import csv
-import math
-
-def printsection(buffer, file_out):
-    obj = dict(zip(buffer[::2], buffer[1::2]))
-    for keys, values in obj.items():
-        if keys == '1':
-            try:
-                row = [values, math.floor(float(obj['10'])),math.floor(float(obj['20']))]
-                with open(file_out, 'a') as csvFile:
-                    writer = csv.writer(csvFile, delimiter =';')
-                    if row[0] != '':
-                        writer.writerow(row)
-
-                csvFile.close()
-            except:
-                print("ERROR")
-
-
-def read(file, file_out):
-    buffer = []
-    file = open(file, "r")
-    for line in file:
-        line = line.strip()
-        #print(line)
-        if line == '100':
-            printsection(buffer, file_out)
-            buffer = []
-        buffer.append(line)
-    printsection(buffer, file_out)
-
-
-read("../drawings/sample.DXF", "sample.csv")

+ 0 - 121
old/regex_extraction.py

@@ -1,121 +0,0 @@
-# coding=utf8
-import re
-import pandas
-
-def clean(extracted_dimensions):
-    #next part extracts the isos and removes everything we dont need like just text or detail/maßstab, einzelne buchstaben und zahlen
-    isos = []
-    for line in extracted_dimensions:
-        matches = re.findall(regex_isos,line)
-        for match in matches:
-            isos.append(match)
-
-
-    i = 0
-    new_matches = []
-    for match in extracted_dimensions:
-        match = match.split('\n')[0]
-        if not re.search(reg_all, match):
-            new_matches.append(match)
-        i += 1
-
-    #print(isos)
-    #print(extracted_dimensions)
-    return isos, new_matches
-
-
-def print_clean(dims):
-    dims_new = []
-    dimss = []
-    #or dim in dims:
-    #    if "CT" in dim:
-    #        dim = re.split("CT",dim)
-    #        for di in dim:
-    #            dimss.extend(di)
-    #print(dimss)
-    for dim in dimss:
-        if re.search(r"b\s\d*\W?\d*\s.",dim):
-            dim = dim.replace('b', u"\u27C2")
-        if re.search(r"g\s\d*\W?\d*", dim):
-            dim = dim.replace('g', u"\u232D")
-        if re.search(r"f\s\d*\W?\d*", dim):
-            dim = dim.replace('f',  u"\u2225")
-        if re.search(r"r\s\d*\W?\d*", dim):
-            dim = dim.replace('r', u"\u25CE")
-        if re.search(r"i\s\d*\W?\d*", dim):
-            dim = dim.replace('i', u"\u232F")
-        if re.search(r"j\s\d*\W?\d*", dim):
-            dim = dim.replace('j', u"\u2316")
-        if re.search(r"d\s\d*\W?\d*", dim):
-            dim = dim.replace('d', u"\u2313")
-        if re.search(r"c\s+\d*", dim):
-            dim = dim.replace('c', u"\u23E5")
-        if re.search(r"n\s+\d*", dim):
-            dim = dim.replace('n', u"\u2300")
-        if "È" in dim:
-            dim = dim.replace('È', 'GG')
-        if "`" in dim:
-            dim = dim.replace('`', u"\u00B1")
-        if "#" in dim:
-            dim = dim.replace('#', "↔")
-        if "⌀" in dim:
-            dim = dim.replace('⌀', "Ø")
-        reg12 = re.compile(r"(\d{1,2}\.?\d{0,2})\s\+\s-\s(\d{1,2}\.?\d{0,2})\s(\d{1,2}\.?\d{0,2})") ##???? was machst du?? nach toleranzen suchen, mit +/- blabla
-        g = re.search(reg12, dim)
-        if g:
-            dim = re.sub(reg12, g.group(1) + " + " + g.group(2) + " - " + g.group(3), dim) # +/- toleranzen schön darstellen
-        dims_new.append(dim.strip())
-        dimms = []
-        i = 0
-        for dim in dims_new:
-            last_item = i - 1
-            next_item = i + 1
-            if not re.search(r"[a-zA-Z]{3,}|^\d\s\d$|^[a-zA-Z]{2,}\d.*$",dim) and not dim == "-":
-                dimms.append(dim)
-
-
-        ####nicht dabei: neigungswinkel und lauftoleranzen
-    print(dimms)
-    return dimms
-
-
-
-regex = r"(\S+\s{1,3}?\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s?\S*\S*\s+)" #alle gruppen von zahlen raus
-regex1 = r"([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)" #ti get the bezeichnungen raus
-regex2 = r"((?!\d)(?!Rpk)[a-zA-Z]{3,})" #alle wörter raus??? außer Rpk
-regex_isos = r"(ISO\s\d\d\d\d?\W?\d?\W?\d?\W?\d?)|(EN\s\d*)" #get iso standards
-reg = r"(^\d{1}$)" #einzelne Zahlen raus #checked
-reg1 = r"(^[A-Z]{1}-?[A-Z]?$)" #einzelne Buchstaben raus #checked
-reg_all = re.compile(r"(ISO\s\d\d\d\d?\W?\d?\W?\d?\W?\d?|(EN\s\d*)|^[A-Z]{1}-?[A-Z]?\s*$)|([A-Z]\W?[A-Z]?\s?\W\s?\d\d?\s?\s?:\s?\d\d?\s?\W)|((?!\d)(?!Rpk)[a-zA-Z]{3,}?\W)|(?!0)(^\d{1}\s*$|A\d{1}|\d\s\d\s\d\s\d\s\d)|BY|to:?|of|or|is|in|as|be|by |\d\d\d\d\d\d\d|\d\s\/\s\d")
-extracted_dimensions = []
-#text = csv_to_text.read_csv('/home/bscheibel/PycharmProjects/engineering_drawings_extraction/temporary/text_merged_GV12.csv')
-
-#file = open('values_clusteredfromPDF_GV12.csv', 'r')
-#text = file.read()
-#file.close()
-#text_df = pandas.read_csv(file)
-
-def extract_pretty(input):
-    #text = input['element']
-    text_all = []
-    for key, value in input:
-        text_combined = ""
-        #new_arr = ""
-       # print(element)
-        element = eval(element)
-        for x in element:
-            text_combined += x[4] + " "
-            #print(x[4])
-        text_all.append(text_combined)
-    #print(text_all)
-
-    #for line in text_combined:
-    #    extracted_dimensions.append(line.strip())
-
-    isos, dims = clean(text_all)
-    #print(isos)
-    #isos, dims = clean(dims)
-    new_dims = print_clean(dims)
-    #for dim in dims:
-    #    print(dim)
-    return new_dims

+ 0 - 91
old/test.csv

@@ -1,91 +0,0 @@
-LAYOUT;420;297
-Concepts, principales and rules according to:;0;2
-Dimensions according to:;0;-1
-Tolerances of form, orientation, location and run-out:;0;-3
-Edge finish according to:;0;-6
-Surface texture according to:;0;-9
-Limits according to:;0;-12
-16% rule:;0;-15
-ISO 8015;0;2
-ISO 14405 1-3;0;-1
-ISO 1101;0;-3
-ISO 13715;0;-6
-ISO 1302;0;-9
-ISO 286-2;0;-12
-not applicable;0;-15
-Rz 16;4;3
-2x;163;195
-%%c;163;199
-19,20;163;203
--;163;210
-0,00;160;211
-0,05;163;211
-%%c;152;197
-15,15;152;200
-+;149;208
-0,05;149;209
-0,00;152;209
-1,5;87;182
-+;92;185
-0,1;93;185
-0,0;93;182
-4,8;86;170
-?0,05;91;170
-2;113;179
-+;116;182
-0,1;117;182
-0,0;117;179
-9,3;112;170
-+;117;173
-0,1;119;173
-0,0;119;170
-%%c;140;158
-4,5;140;161
-+;137;167
-+;140;167
-0,040;137;168
-0,028;140;168
-A;17;31
-A;202;31
-B;17;75
-B;202;75
-C;17;125
-C;202;125
-D;17;175
-D;202;175
-E;17;225
-E;202;225
-F;17;269
-F;202;269
-4;39;9
-4;39;291
-3;84;9
-3;84;291
-2;134;9
-2;134;291
-1;179;9
-1;179;291
-BEATE GV12;160;42
-GV_12;152;22
-A4;191;24
-Page 1/1;164;12
-Scale: 2:1;120;12
-Drawing nr.:;120;25
-Designation:;120;46
-Surface material/treatment:;21;47
-Material: 1.4021;20;64
-Surface Texture:;65;64
-Hardened to 48 HRC +2/-0;86;47
-Quality Standard: EN 10095;20;17
-A;130;108
-A;91;107
-2x %%c <>;165;225
-%%c<>;155;221
-<>;101;180
-<>;108;167
-<>;105;177
-<>;108;167
-%%c<>;143;210
-Layout1;0;0
-Layout2;0;0
-Model;0;0

+ 0 - 36
read_iso_tables/foo-page-3-table-1.csv

@@ -1,36 +0,0 @@
-"Toleran
-Kurzzeichen","zklasse
-Benennung","von 0,5^)
-bis 3
-o
-o","über 3
-bis 6
-CO","Grenzal
-über 6
-bis 30","bmaße für 1
-über 30
-bis 120","Nennmaßb
-über 120
-bis 400","ereiche
-über 400
-bis 1000","über 1000
-bis 2000","über 2000
-bis 4000"
-"f","fein","CO
-±0,05
-+1
-+1","±0,05","±0,1","±0,15","±0,2","±0,3","±0,5","—"
-"m","mittel","±0,1","±0,1","±0,2","±0,3","±0,5","±0,8","± 1,2","±2"
-"c","grob","±0,2","±0,3","±0,5","±0,8","±1,2","±2","±3","±4"
-"V
-±4
-±6
-±2,5
-±8
-sehr grob
-—
-±0,5
-±1
-±1,5","","","","","","","","",""
-"^) Für Nennmaße unter 0,5 mm sind die Grenzabmaße direkt an dem (den) entsprechenden Nennmaß(en) anzugeben.","","","","","","","","",""

+ 0 - 15
read_iso_tables/foo-page-3-table-2.csv

@@ -1,15 +0,0 @@
-"Toleran
-Kurzzeichen","zklasse
-Benennung","Grei
-von 0,5^) bis 3
-±0,2","izabmaße für Nennmaßbere
-über 3 bis 6
-±0,5","iche
-über 6
-± 1"
-"f","fein","","",""
-"m","mittel","±0,4","± 1","±2"
-"c","grob","","",""
-"V
-sehr grob","","","",""
-"^) Für Nennmaße unter 0,5 mm sind die Grenzabmaße direkt an dem (den) entsprechenden Nennmaß(en) anzugeben.","","","",""

+ 0 - 20
read_iso_tables/foo-page-3-table-3.csv

@@ -1,20 +0,0 @@
-"Toleran
-Kurzzeichen","zklasse
-Benennung","bis 10","Grenzabmaße fC
-kürzeren Sch(
-über 10
-bis 50","ir Längenbereichi
-3nkel des betreffe
-über 50
-bis 120","e, in mm, für den
->nden Winkels
-über 120
-bis 400","über 400"
-"f","fein","± 1°","± 0° 30'","","±0° 10'","±0°5'"
-"m","mittel","","","","",""
-"c","grob","","± 1°","± 0° 30'","±0° 15'","±0° 10'"
-"V
-±2°
-±1°
-±0° 20'
-sehr grob","","","","","",""

+ 0 - 67
read_iso_tables/output_mit_camelot-page-3-table-1.csv

@@ -1,67 +0,0 @@
-"Toleran
-zklasse
-Benennung
-Kurzzeichen","","Nennmaßb
-ereiche
-Grenzal
-bmaße für 1
-über 1000
-von 0,5^)
-über 120
-über 400
-über 3
-über 6
-über 30
-über 2000
-bis 6
-bis 2000
-bis 3
-bis 400
-bis 1000
-bis 4000
-bis 30
-bis 120
-CO
-o
-o","","","","","","",""
-"f
-fein","","CO
-±0,3
-±0,5
-±0,05
-±0,2
-—
-±0,05
-±0,1
-±0,15
-+1
-+1","","","","","","",""
-"m
-mittel","","±0,8
-±2
-±0,1
-±0,2
-±0,5
-±0,1
-±0,3
-± 1,2","","","","","","",""
-"c
-grob","","±3
-±1,2
-±2
-±4
-±0,2
-±0,3
-±0,5
-±0,8","","","","","","",""
-"V
-sehr grob","","±4
-±6
-±2,5
-±8
-—
-±0,5
-±1
-±1,5","","","","","","",""
-"^) Für Nennmaße unter 0,5 mm sind die Grenzabmaße direkt an dem (den) entsprechenden Nennmaß(en) anzugeben.","","","","","","","","",""

+ 0 - 18
read_iso_tables/output_mit_camelot-page-3-table-2.csv

@@ -1,18 +0,0 @@
-"Toleran
-zklasse
-Benennung
-Kurzzeichen","","iche
-Grei
-izabmaße für Nennmaßbere
-über 6
-von 0,5^) bis 3
-über 3 bis 6","",""
-"f
-fein","","±0,2","±0,5","± 1"
-"m
-mittel","","","",""
-"c
-grob","","±0,4","± 1","±2"
-"V
-sehr grob","","","",""
-"^) Für Nennmaße unter 0,5 mm sind die Grenzabmaße direkt an dem (den) entsprechenden Nennmaß(en) anzugeben.","","","",""

+ 0 - 32
read_iso_tables/output_mit_camelot-page-3-table-3.csv

@@ -1,32 +0,0 @@
-"Toleran
-zklasse
-Benennung
-Kurzzeichen","","ir Längenbereichi
-e, in mm, für den
-Grenzabmaße fC
->nden Winkels
-kürzeren Sch(
-3nkel des betreffe
-bis 10
-über 120
-über 400
-über 10
-über 50
-bis 50
-bis 400
-bis 120","","","",""
-"fein
-f","","± 1°
-± 0° 30'
-±0° 10'","","","","±0°5'"
-"m
-mittel","","","","","",""
-"c
-grob","","±0° 10'
-± 1°
-± 0° 30'
-±0° 15'","","","",""
-"V
-sehr grob","","±2°
-±1°
-±0° 20'","","","",""

+ 0 - 8
read_iso_tables/pdf_table_extractor.py

@@ -1,8 +0,0 @@
-import camelot
-import matplotlib.pyplot as plt
-tables = camelot.read_pdf('/home/bscheibel/PycharmProjects/engineering_drawings_extraction/iso_documents/ISO2768-1.PDF', pages="3",line_scale=70, line_tol=2, joint_tol=35)
-tables.export('foo.csv', f='csv')
-print(tables[0].df)
-camelot.plot(tables[0], kind='grid')
-plt.show()
-print(tables[3])

+ 0 - 46
read_iso_tables/read_isos.py

@@ -1,46 +0,0 @@
-import nltk
-import re
-from tika import parser
-einleitung = False
-raw = parser.from_file('/home/bscheibel/PycharmProjects/engineering_drawings_extraction/iso_documents/ISO2768-1.PDF')
-#raw = parser.from_file('iso_documents/ISO286-2.PDF')
-print(raw['content'])
-#text = raw['content']
-#sent_text = nltk.sent_tokenize(text)
-#tokenized_text = nltk.word_tokenize(sent_text.split)
-#tagged = nltk.pos_tag(tokenized_text)
-#match = text.concordance('Toleranz')
-#for text in sent_text:
-#    if "Toleranz" in text and einleitung is True:
-#        print(text)
-#    if "Einleitung" in text:
-#        einleitung = True
-
-
-import subprocess
-#subprocess.check_output(['ls','-l']) #all that is technically needed...
-cmd = 'pdftotext -layout "/home/bscheibel/PycharmProjects/engineering_drawings_extraction/iso_documents/ISO8015.PDF"'
-print(subprocess.Popen(cmd, shell=True))
-
-#convert iso document to text
-text = "iso_documents/ISO8015.txt"
-#search for table of content with regex
-contents = []
-regex = r"(.*?)[\W]+(\d+)(?=\n|$)"
-r"([^\.]\d\.?\d?\.?\d?\.?\d?)\s([a-zA-Z]*)\s([a-zA-Z]*)\s*([a-zA-Z]*)\W?\s*([a-zA-Z]+)\s*\.{10,}([\d]+)"
-matches = re.finditer(regex, text, re.MULTILINE)
-#contents = re.findall(r"(.*?)[\W]+(\d+)(?=\n|$)", text, flags=re.M)
-#print(contents)
-for matchNum, match in enumerate(matches, start=1):
-
-    print("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum=matchNum, start=match.start(),
-                                                                        end=match.end(), match=match.group()))
-
-    for groupNum in range(0, len(match.groups())):
-        groupNum = groupNum + 1
-
-        print("Group {groupNum} found at {start}-{end}: {group}".format(groupNum=groupNum, start=match.start(groupNum),
-                                                                        end=match.end(groupNum),
-                                                                        group=match.group(groupNum)))
-#only search for sections with toleranzen/abmaße
-

+ 0 - 37
read_iso_tables/read_tables.py

@@ -1,37 +0,0 @@
-import tabula
-import camelot
-import subprocess
-import re
-
-#tables = tabula.read_pdf("iso_documents/ISO2768-1.PDF", pages=3)
-#for table in tables:
-#    print(table)
-
-#pdftotext - layout!!!!
-
-#tabula.convert_into("iso_documents/ISO2768-1.PDF", "output_mit_tabula.csv", output_format="csv", pages='all', multiple_tables=True)
-#df = tabula.read_pdf("iso_documents/ISO1101.PDF", pages='all', multiple_tables=True)
-#print(df)
-
-def file_read(fname):
-    content_array = []
-    with open(fname) as f:
-        # Content_list is the list that contains the read lines.
-        for line in f:
-            content_array.append(line.strip().replace(" ",""))
-        print(content_array)
-
-
-#file_read('drawings/5129275_Rev01-GV12.txt')
-tables = camelot.read_pdf("/home/bscheibel/PycharmProjects/engineering_drawings_extraction/iso_documents/ISO2768-1.PDF", pages="3")
-tables.export('output_mit_camelot.csv', f='csv')
-
-output = subprocess.check_output(["less","/home/bscheibel/PycharmProjects/engineering_drawings_extraction/iso_documents/ISO2768-1.PDF"])
-print(output)
-
-re_data_prefix = re.compile("^[0-9]+[.].*$")
-re_data_fields = re.compile("(([^ ]+[ ]?)+)")
-for line in output.splitlines():
-    if re_data_prefix.match(line):
-        for l in re_data_fields.findall(line):
-            print[l[0].strip()]