Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
113 lines (106 sloc) 4.07 KB
import xlrd
import json
import io
import re
import numpy as np
file_location = "D:/Evision/cleaned.xlsx"
workbook = xlrd.open_workbook(file_location)
sheet = workbook.sheet_by_index(0)
masoutis_desc = []
masoutis_cat1 = []
masoutis_cat2 = []
masoutis_cat3 = []
masoutis_cat4 = []
masoutis_brand = []
for i in range(sheet.nrows):
masoutis_desc.append(sheet.cell_value(i,9))
masoutis_cat1.append(sheet.cell_value(i,1))
masoutis_cat2.append(sheet.cell_value(i,3))
masoutis_cat3.append(sheet.cell_value(i,5))
masoutis_cat4.append(sheet.cell_value(i,7))
masoutis_brand.append(sheet.cell_value(i,10))
with io.open('shot35.json',encoding='utf-8') as f:
a = json.load(f)
my_words=[]
my_words=[]
for item in a['textAnnotations']:
item['description'] = re.sub('ά','α',item['description'])
item['description'] = re.sub('ί','ι',item['description'])
item['description'] = re.sub('ή','η',item['description'])
item['description'] = re.sub('ύ','υ',item['description'])
item['description'] = re.sub('ό','ο',item['description'])
item['description'] = re.sub('ώ','ω',item['description'])
item['description'] = re.sub('έ','ε',item['description'])
my_words.append(item['description'].upper())
# remove words with less than two letters
del my_words[0]
my_words = [i for i in my_words if len(i) > 3]
my_words = [x for x in my_words if not (x.isdigit()
or x[0] == '-' and x[1:].isdigit())]
valid_words=[]
sel_k=[]
cnt_found=[]
cnt=0
for i in range(len(my_words)):
for k in range(len(masoutis_desc)):
new_data=[]
for j in range(len(masoutis_desc[k].split(' '))):
new_data.append(masoutis_desc[k].split(' ')[j])
if (my_words[i] in new_data)==True:
cnt=cnt+1
sel_k.append(k)
valid_words.append(my_words[i])
if cnt!=0:
cnt_found.append(cnt)
cnt=0
valid_words_unq=[]
cnt = 0
for i in range(len(cnt_found)):
valid_words_unq.append(valid_words[cnt])
cnt=cnt+cnt_found[i]
cat2 = [];cat3 = [];cat4= []; desc = []
my_cat2 = [];my_cat3 = []; my_cat4 = []; my_desc=[]
cnt=0
for i in range(len(cnt_found)):
for j in range(cnt_found[i]):
cat2.append([])
cat2[i].append(masoutis_cat2[sel_k[cnt]])
cat3.append([])
cat3[i].append(masoutis_cat3[sel_k[cnt]])
cat4.append([])
cat4[i].append(masoutis_cat4[sel_k[cnt]])
desc.append([])
desc[i].append(masoutis_desc[sel_k[cnt]])
cnt+=1
my_cat2.append(np.unique(cat2[i]))
my_cat3.append(np.unique(cat3[i]))
my_cat4.append(np.unique(cat4[i]))
my_desc.append(np.unique(desc[i]))
my_cat2_1D = [y for x in my_cat2 for y in x]
my_cat2_unq = np.unique(my_cat2_1D)
my_cat3_1D = [y for x in my_cat3 for y in x]
my_cat3_unq = np.unique(my_cat3_1D)
my_cat4_1D = [y for x in my_cat4 for y in x]
my_cat4_unq = np.unique(my_cat4_1D)
my_desc_1D = [y for x in my_desc for y in x]
my_desc_unq = np.unique(my_desc_1D)
count_cat2=[]
for i in range(len(my_cat2_unq)):
count_cat2.append(my_cat2_1D.count(my_cat2_unq[i]))
count_cat3=[]
for i in range(len(my_cat3_unq)):
count_cat3.append(my_cat3_1D.count(my_cat3_unq[i]))
count_cat4=[]
for i in range(len(my_cat4_unq)):
count_cat4.append(my_cat4_1D.count(my_cat4_unq[i]))
count_desc=[]
for i in range(len(my_desc_unq)):
count_desc.append(my_desc_1D.count(my_desc_unq[i]))
count_cat2_sort=sorted(range(len(count_cat2)), key=lambda k: count_cat2[k],reverse=True)
count_cat3_sort=sorted(range(len(count_cat3)), key=lambda k: count_cat3[k],reverse=True)
count_cat4_sort=sorted(range(len(count_cat4)), key=lambda k: count_cat4[k],reverse=True)
count_desc_sort=sorted(range(len(count_desc)), key=lambda k: count_desc[k],reverse=True)
print("Input image is characterized as -", my_cat2_unq[count_cat2_sort[0]],"- in Category2")
print("Input image is characterized as -", my_cat3_unq[count_cat3_sort[0]],"- in Category3")
print("Input image is characterized as -", my_cat4_unq[count_cat4_sort[0]],"- in Category4")
print("Input image is described as -", my_desc_unq[count_desc_sort[0]],"- in Description")
You can’t perform that action at this time.